40 const long lmax = shtns->
lmax;
const int mmax = shtns->
mmax;
const int mres = shtns->
mres;
43 long l=0;
do { Rlm[l] = Qlm[l]; }
while(++l <= lmax);
46 const double alpha_half_turns = alpha * (-2./M_PI);
47 for (
int im=1; im<=mmax; im++) {
48 const long lm0 =
LiM(shtns,0,im);
49 const double k = im*mres;
51 double n = round(k*alpha_half_turns);
52 double k_alpha = fma(k,alpha_half_turns, -n) * (0.5*M_PI);
54 cplx eima = cos(k_alpha) + I*sin(k_alpha);
55 if (quadrant & 2) eima = -eima;
56 if (quadrant & 1) eima = I*eima;
57 for (
long l=im*mres; l<=lmax; ++l) Rlm[lm0+l] = Qlm[lm0+l] * eima;
80static void SH_rotK90_init(
shtns_cfg shtns)
84 int nfft, nrembed, ncembed;
90 const int lmax = shtns->
lmax;
91 const int fac = 2*VSIZE2*NWAY;
92 const int ntheta = fft_int( ((lmax+fac)/fac) , 7) * fac;
95 shtns->ct_rot = malloc(
sizeof(
double)*ntheta );
96 shtns->st_rot = shtns->ct_rot + (ntheta/2);
97 for (
int k=0; k<ntheta/2; ++k) {
98 double cost = cos(((0.5*M_PI)*(2*k+1))/ntheta);
99 double sint = sqrt((1.0-cost)*(1.0+cost));
100 shtns->ct_rot[k] = cost;
101 shtns->st_rot[k] = sint;
105 size_t sze =
sizeof(double)*(2*ntheta+2)*lmax;
108 int k = (lmax < 63) ? 1 : shtns->nthreads;
109 fftw_plan_with_nthreads(k);
112 nfft = 2*ntheta; nrembed = nfft+2; ncembed = nrembed/2;
113 shtns->fft_rot = fftw_plan_many_dft_r2c(1, &nfft, lmax, q0, &nrembed, lmax, 1, q, &ncembed, lmax, 1, FFTW_ESTIMATE);
116 shtns->npts_rot = ntheta;
126static void SH_rotK90(
shtns_cfg shtns,
cplx *Qlm,
cplx *Rlm,
double dphi0,
double dphi1)
132 const int lmax = shtns->
lmax;
133 const int ntheta = shtns->npts_rot;
134 size_t sze =
sizeof(double)*(2*ntheta+2)*lmax;
135 double*
const q0 = VMALLOC(sze);
139 long l=0;
do { Rlm[l] = Qlm[l]; }
while(++l <= lmax);
141 for (
int m=1; m<=lmax; m++) {
142 cplx eima = cos(m*dphi0) - I*sin(m*dphi0);
143 long lm =
LiM(shtns,m,m);
145 for (
long l=m; l<=lmax; ++l) {
146 cplx qrot = Qlm[lm] * eima;
147 ((
double*)Rlm)[2*lm] = creal(qrot);
148 ((
double*)Rlm)[2*lm+1] = cimag(qrot) * em;
156 rnd*
const qve = (rnd*) VMALLOC(
sizeof(rnd)*NWAY*4*lmax );
157 rnd*
const qvo = qve + NWAY*2*lmax;
158 double*
const ct = shtns->ct_rot;
159 double*
const st = shtns->st_rot;
160 double*
const alm = shtns->alm;
161 const long nk = ntheta/(2*VSIZE2);
164 rnd cost[NWAY], y0[NWAY], y1[NWAY];
168 for (
int j=0; j<NWAY; ++j) {
169 y0[j] = vall(al[0]) / vread(st, j+k);
170 cost[j] = vread(ct, j+k);
172 for (
int j=0; j<NWAY; ++j) {
173 y1[j] = vall(al[1]) * y0[j] * cost[j];
177 for (
int j=0; j<NWAY; ++j) {
178 qve[ (l-2)*2*NWAY + 2*j] = y1[j] * vall(creal(Qlm[l-1]));
179 qve[ (l-2)*2*NWAY + 2*j+1] = vall(0.0);
180 qvo[ (l-2)*2*NWAY + 2*j] = vall(0.0);
181 qvo[ (l-2)*2*NWAY + 2*j+1] = vall(0.0);
182 y0[j] = vall(al[1])*(cost[j]*y1[j]) + vall(al[0])*y0[j];
184 for (
int j=0; j<NWAY; ++j) {
185 qve[ (l-1)*2*NWAY + 2*j] = y0[j] * vall(creal(Qlm[l]));
186 qve[ (l-1)*2*NWAY + 2*j+1] = vall(0.0);
187 qvo[ (l-1)*2*NWAY + 2*j] = vall(0.0);
188 qvo[ (l-1)*2*NWAY + 2*j+1] = vall(0.0);
189 y1[j] = vall(al[3])*(cost[j]*y0[j]) + vall(al[2])*y1[j];
194 for (
int j=0; j<NWAY; ++j) {
195 qve[ (l-2)*2*NWAY + 2*j] = y1[j] * vall(creal(Qlm[l-1]));
196 qve[ (l-2)*2*NWAY + 2*j+1] = vall(0.0);
197 qvo[ (l-2)*2*NWAY + 2*j] = vall(0.0);
198 qvo[ (l-2)*2*NWAY + 2*j+1] = vall(0.0);
202 for (
long m=1; m<=lmax; ++m) {
205 double* al = shtns->alm + m*(2*(lmax+1) -m+1);
206 cplx* Ql = &Qlm[
LiM(shtns, 0,m)];
207 rnd cost[NWAY], y0[NWAY], y1[NWAY];
208 for (
int j=0; j<NWAY; ++j) {
209 cost[j] = vread(st, k+j);
214 if ((
int)lmax <= SHT_L_RESCALE_FLY) {
216 if (l&1)
for (
int j=0; j<NWAY; ++j) y0[j] *= cost[j];
217 for (
int j=0; j<NWAY; ++j) cost[j] *= cost[j];
223 for (
int j=0; j<NWAY; ++j) y0[j] *= cost[j];
225 if (vlo(y0[0]) < (SHT_ACCURACY+1.0/SHT_SCALE_FACTOR)) {
227 for (
int j=0; j<NWAY; ++j) y0[j] *= vall(SHT_SCALE_FACTOR);
230 for (
int j=0; j<NWAY; ++j) cost[j] *= cost[j];
232 if (vlo(cost[0]) < 1.0/SHT_SCALE_FACTOR) {
234 for (
int j=0; j<NWAY; ++j) cost[j] *= vall(SHT_SCALE_FACTOR);
238 for (
int j=0; j<NWAY; ++j) {
239 y0[j] *= vall(al[0]);
240 cost[j] = vread(ct, j+k);
242 for (
int j=0; j<NWAY; ++j) {
243 y1[j] = (vall(al[1])*y0[j]) *cost[j];
246 while ((ny<0) && (l<lmax)) {
247 for (
int j=0; j<NWAY; ++j) {
248 y0[j] = vall(al[1])*(cost[j]*y1[j]) + vall(al[0])*y0[j];
250 for (
int j=0; j<NWAY; ++j) {
251 y1[j] = vall(al[3])*(cost[j]*y0[j]) + vall(al[2])*y1[j];
254 if (fabs(vlo(y0[NWAY-1])) > SHT_ACCURACY*SHT_SCALE_FACTOR + 1.0) {
256 for (
int j=0; j<NWAY; ++j) {
257 y0[j] *= vall(1.0/SHT_SCALE_FACTOR); y1[j] *= vall(1.0/SHT_SCALE_FACTOR);
263 rnd qr = vall(creal(Ql[l])); rnd qi = vall(cimag(Ql[l]));
264 for (
int j=0; j<NWAY; ++j) {
265 qv[ (l-1)*2*NWAY + 2*j] += y0[j] * qr;
266 qv[ (l-1)*2*NWAY + 2*j+1] += y0[j] * qi;
268 qr = vall(creal(Ql[l+1])); qi = vall(cimag(Ql[l+1]));
269 for (
int j=0; j<NWAY; ++j) {
270 qv[ (l)*2*NWAY + 2*j] += y1[j] * qr;
271 qv[ (l)*2*NWAY + 2*j+1] += y1[j] * qi;
273 for (
int j=0; j<NWAY; ++j) {
274 y0[j] = vall(al[1])*(cost[j]*y1[j]) + vall(al[0])*y0[j];
276 for (
int j=0; j<NWAY; ++j) {
277 y1[j] = vall(al[3])*(cost[j]*y0[j]) + vall(al[2])*y1[j];
282 rnd qr = vall(creal(Ql[l])); rnd qi = vall(cimag(Ql[l]));
283 for (
int j=0; j<NWAY; ++j) {
284 qv[ (l-1)*2*NWAY + 2*j] += y0[j] * qr;
285 qv[ (l-1)*2*NWAY + 2*j+1] += y0[j] * qi;
292 double* qse = (
double*) qve;
293 double* qso = (
double*) qvo;
294 for (
long l=1; l<=lmax; ++l) {
295 for (
int j=0; j<NWAY; j++) {
296 for (
int i=0; i<VSIZE2; i++) {
297 double qre = qse[(l-1)*2*NWAY*VSIZE2 + 2*j*VSIZE2 + i];
298 double qie = qse[(l-1)*2*NWAY*VSIZE2 + (2*j+1)*VSIZE2 + i];
299 double qro = qso[(l-1)*2*NWAY*VSIZE2 + 2*j*VSIZE2 + i];
300 double qio = qso[(l-1)*2*NWAY*VSIZE2 + (2*j+1)*VSIZE2 + i];
301 long ijk = (k+j)*VSIZE2 + i;
302 qre *= st[ijk]; qro *= st[ijk];
305 q0[ijk*lmax +(l-1)] = (qre + qro) - (qie + qio);
306 q0[(ntheta+ijk)*lmax +(l-1)] = ((qre + qro) + (qie + qio)) * signl;
307 q0[(2*ntheta-1-ijk)*lmax +(l-1)] = (qre - qro) + (qie - qio);
308 q0[(ntheta-1-ijk)*lmax +(l-1)] = ((qre - qro) - (qie - qio)) * signl;
322 fftw_execute_dft_r2c(shtns->fft_rot, q0, q);
326 const int nphi = 2*ntheta;
330 legendre_sphPlm_deriv_array_equ(shtns, lmax, m, ydyl+m);
331 for (l=1; l<lmax; l+=2) {
332 Rlm[lm] = -creal(q[m*lmax +(l-1)])/(ydyl[l]*nphi);
333 Rlm[lm+1] = creal(q[m*lmax +l])/(ydyl[l+1]*nphi);
337 Rlm[lm] = -creal(q[m*lmax +(l-1)])/(ydyl[l]*nphi);
341 for (m=1; m<=lmax; ++m) {
342 legendre_sphPlm_deriv_array_equ(shtns, lmax, m, ydyl+m);
343 cplx eimdp = (cos(m*dphi1) - I*sin(m*dphi1))/nphi;
344 for (l=m; l<lmax; l+=2) {
345 Rlm[lm] = eimdp*q[m*lmax +(l-1)]*(1./ydyl[l]);
346 Rlm[lm+1] = eimdp*q[m*lmax +l]*(-1./ydyl[l+1]);
350 Rlm[lm] = eimdp*q[m*lmax +(l-1)]*(1./ydyl[l]);
369 int lmax= shtns->
lmax;
370 if ((shtns->
mres != 1) || (shtns->
mmax < lmax)) shtns_runerr(
"truncature makes rotation not closed.");
375 double q0 = creal(Qlm[
LiM(shtns, l, 0)]);
376 Rlm[
LiM(shtns, l, 0)] = sqrt(2.0) * cimag(Qlm[
LiM(shtns, l, 1)]);
377 Rlm[
LiM(shtns, l ,1)] = creal(Qlm[
LiM(shtns, l, 1)]) - I*(sqrt(0.5)*q0);
381 SH_rotK90(shtns, Qlm, Rlm, 0.0, -M_PI/2);
388 int lmax= shtns->
lmax;
389 if ((shtns->
mres != 1) || (shtns->
mmax < lmax)) shtns_runerr(
"truncature makes rotation not closed.");
394 double q0 = creal(Qlm[
LiM(shtns, l, 0)]);
395 Rlm[
LiM(shtns, l, 0)] = sqrt(2.0) * creal(Qlm[
LiM(shtns, l, 1)]);
396 Rlm[
LiM(shtns, l ,1)] = I*cimag(Qlm[
LiM(shtns, l, 1)]) - sqrt(0.5) * q0;
400 SH_rotK90(shtns, Qlm, Rlm, -M_PI/2, 0.0);
406 if ((shtns->
mres != 1) || (shtns->
mmax < shtns->
lmax)) shtns_runerr(
"truncature makes rotation not closed.");
408 SH_rotK90(shtns, Qlm, Rlm, 0.0, M_PI/2 + alpha);
409 SH_rotK90(shtns, Rlm, Rlm, 0.0, M_PI/2);
425static void mul_ct_matrix_shifted(
shtns_cfg shtns,
double* mx)
432 for (im=0; im<=MMAX; im++) {
433 double* al = alm_im(shtns,im);
441 mx[2*lm] = -a_1*al[0];
446 mx[2*lm] = sqrt((l+m)*(l-m))/(2*l+1);
452 for (im=0; im<=MMAX; im++) {
453 double* al = alm_im(shtns, im);
455 while(++l <= LMAX+1) {
465static void st_dt_matrix_shifted(
shtns_cfg shtns,
double* mx)
467 mul_ct_matrix_shifted(shtns, mx);
468 for (
int lm=0; lm<NLM; lm++) {
469 mx[2*lm] *= -(shtns->
li[lm] + 2);
470 mx[2*lm+1] *= shtns->
li[lm];
483 mul_ct_matrix_shifted(shtns, mx);
484 for (
int lm=2*NLM-1; lm>0; lm--) mx[lm] = mx[lm-1];
486 for (
int im=1; im<=MMAX; im++) {
487 int lm =
LiM(shtns, im*MRES, im);
488 mx[2*lm-1] = 0.0; mx[2*lm] = 0.0;
499 for (
int lm=0; lm<NLM; lm++) {
500 mx[2*lm] *= shtns->
li[lm] - 1;
501 mx[2*lm+1] *= -(shtns->
li[lm] + 2);
512 v2d* vq = (v2d*) Qlm;
513 v2d* vr = (v2d*) Rlm;
516 s2d mxu = vdup(mx[1]);
518 for (lm=1; lm<nlmlim; lm++) {
519 s2d mxl = vdup(mx[2*lm]); s2d mxu = vdup(mx[2*lm+1]);
520 vr[lm] = mxl*vq[lm-1] + mxu*vq[lm+1];
523 s2d mxl = vdup(mx[2*lm]);
524 vr[lm] = mxl*vq[lm-1];
547 v2d vr0 = vdup(0.0); v2d vr1 = vdup(0.0);
552 for (l=0; l<LTR; l+=2) {
553 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
554 vr0 += ((v2d*)alm)[ll] * vdup(yl[l]);
555 ll += (l<MMAX) ? 2*l+2 : 2*MMAX+1;
556 vr1 += ((v2d*)alm)[ll] * vdup(yl[l+1]);
559 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
560 vr0 += ((v2d*)alm)[ll] * vdup(yl[l]);
563 z = vcplx_real(vr0) + I*vcplx_imag(vr0);
565 cplx eip = cos(MRES*phi) + I*sin(MRES*phi);
569 for (
long im=1; im<=MTR; im++) {
570 const long m = im*MRES;
574 if (lnz > LTR)
break;
576 v2d vrm = vdup(0.0); v2d vim = vdup(0.0);
579 cplx* almm = alm - 2*m;
583 int lx = (lnz <= MMAX) ? lnz : MMAX+1;
584 ll += ((m+lx-1)*(lx-m));
586 ll += (2*MMAX+1)*(lnz-(MMAX+1));
590 for (; l<=LMAX; l++) {
591 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
592 vim += vdup(yl[l]) * ((v2d*)almm)[ll];
593 vrm += vdup(yl[l]) * ((v2d*)alm)[ll];
599 vrc += vdup(creal(eimp)) * (vrm+vim);
600 vrs += vdup(cimag(eimp)) * (vrm-vim);
603 z += vcplx_real(vrc) - vcplx_imag(vrs) + I*(vcplx_imag(vrc) + vcplx_real(vrs));
616 vr0 = 0.0; vr1 = 0.0;
619 for (l=m; l<LTR; l+=2) {
620 vr0 += yl[l] * creal( Qlm[l] );
621 vr1 += yl[l+1] * creal( Qlm[l+1] );
624 vr0 += yl[l] * creal( Qlm[l] );
628 for (im=1; im<=MTR; im++) {
631 if (lnz > LTR)
break;
633 v2d* Ql = (v2d*) &Qlm[
LiM(shtns, 0,im)];
634 v2d vrm0 = vdup(0.0); v2d vrm1 = vdup(0.0);
635 for (l=lnz; l<LTR; l+=2) {
636 vrm0 += vdup(yl[l]) * Ql[l];
637 vrm1 += vdup(yl[l+1]) * Ql[l+1];
639 cplx eimp = 2.*(cos(m*phi) + I*sin(m*phi));
642 vrm0 += vdup(yl[l]) * Ql[l];
644 vr0 += vcplx_real(vrm0)*creal(eimp) - vcplx_imag(vrm0)*cimag(eimp);
649void SH_to_grad_point(
shtns_cfg shtns,
cplx *DrSlm,
cplx *Slm,
double cost,
double phi,
650 double *gr,
double *gt,
double *gp)
654 double vtt, vpp, vr0, vrm;
657 const double sint = sqrt((1.-cost)*(1.+cost));
658 vtt = 0.; vpp = 0.; vr0 = 0.; vrm = 0.;
660 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
661 for (l=m; l<=LTR; ++l) {
662 vr0 += yl[l] * creal( DrSlm[l] );
663 vtt += dtyl[l] * creal( Slm[l] );
668 long lnz = legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
669 if (lnz > LTR)
break;
671 cplx eimp = 2.*(cos(m*phi) + I*sin(m*phi));
672 cplx imeimp = eimp*m*I;
673 l =
LiM(shtns, 0,im);
674 v2d* Ql = (v2d*) &DrSlm[l]; v2d* Sl = (v2d*) &Slm[l];
676 v2d dsdt = vdup(0.0); v2d dsdp = vdup(0.0);
677 for (l=lnz; l<=LTR; ++l) {
678 qm += vdup(yl[l]) * Ql[l];
679 dsdt += vdup(dtyl[l]) * Sl[l];
680 dsdp += vdup(yl[l]) * Sl[l];
682 vrm += vcplx_real(qm)*creal(eimp) - vcplx_imag(qm)*cimag(eimp);
683 vtt += vcplx_real(dsdt)*creal(eimp) - vcplx_imag(dsdt)*cimag(eimp);
684 vpp += vcplx_real(dsdp)*creal(imeimp) - vcplx_imag(dsdp)*cimag(imeimp);
685 }
while (++im <= MTR);
695 double *vr,
double *vt,
double *vp)
699 double vtt, vpp, vr0, vrm;
702 const double sint = sqrt((1.-cost)*(1.+cost));
703 vtt = 0.; vpp = 0.; vr0 = 0.; vrm = 0.;
705 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
706 for (l=m; l<=LTR; ++l) {
707 vr0 += yl[l] * creal( Qlm[l] );
708 vtt += dtyl[l] * creal( Slm[l] );
709 vpp -= dtyl[l] * creal( Tlm[l] );
714 legendre_sphPlm_deriv_array(shtns, LTR, im, cost, sint, &yl[m], &dtyl[m]);
715 cplx eimp = 2.*(cos(m*phi) + I*sin(m*phi));
716 cplx imeimp = eimp*m*I;
717 l =
LiM(shtns, 0,im);
718 v2d* Ql = (v2d*) &Qlm[l]; v2d* Sl = (v2d*) &Slm[l]; v2d* Tl = (v2d*) &Tlm[l];
720 v2d dsdt = vdup(0.0); v2d dtdt = vdup(0.0);
721 v2d dsdp = vdup(0.0); v2d dtdp = vdup(0.0);
722 for (l=m; l<=LTR; ++l) {
723 qm += vdup(yl[l]) * Ql[l];
724 dsdt += vdup(dtyl[l]) * Sl[l];
725 dtdt += vdup(dtyl[l]) * Tl[l];
726 dsdp += vdup(yl[l]) * Sl[l];
727 dtdp += vdup(yl[l]) * Tl[l];
729 vrm += vcplx_real(qm)*creal(eimp) - vcplx_imag(qm)*cimag(eimp);
730 vtt += (vcplx_real(dtdp)*creal(imeimp) - vcplx_imag(dtdp)*cimag(imeimp))
731 + (vcplx_real(dsdt)*creal(eimp) - vcplx_imag(dsdt)*cimag(eimp));
732 vpp += (vcplx_real(dsdp)*creal(imeimp) - vcplx_imag(dsdp)*cimag(imeimp))
733 - (vcplx_real(dtdt)*creal(eimp) - vcplx_imag(dtdt)*cimag(eimp));
734 }
while (++im <= MTR);
758 double *vr,
double *vt,
double *vp,
int nphi,
int ltr,
int mtr)
760 cplx vst, vtt, vsp, vtp, vrr;
761 cplx *vrc, *vtc, *vpc;
766 if (ltr > LMAX) ltr=LMAX;
767 if (mtr > MMAX) mtr=MMAX;
768 if (mtr*MRES > ltr) mtr=ltr/MRES;
769 if (mtr*2*MRES >= nphi) mtr = (nphi-1)/(2*MRES);
771 ylm_lat = shtns->ylm_lat;
772 if (ylm_lat == NULL) {
773 ylm_lat = (
double *) malloc(
sizeof(
double)* 2*NLM);
774 shtns->ylm_lat = ylm_lat;
776 dylm_lat = ylm_lat + NLM;
778 st_lat = sqrt((1.-cost)*(1.+cost));
779 if (cost != shtns->ct_lat) {
780 shtns->ct_lat = cost;
781 for (
int m=0,j=0; m<=mtr; ++m) {
782 legendre_sphPlm_deriv_array(shtns, ltr, m, cost, st_lat, &ylm_lat[j], &dylm_lat[j]);
783 j += LMAX -m*MRES +1;
787 vrc = (
cplx*) fftw_malloc(
sizeof(
double) * 3*(nphi+2));
788 vtc = vrc + (nphi/2+1);
789 vpc = vtc + (nphi/2+1);
791 if (nphi != shtns->nphi_lat) {
792 if (shtns->ifft_lat) fftw_destroy_plan(shtns->ifft_lat);
794 fftw_plan_with_nthreads(1);
796 shtns->ifft_lat = fftw_plan_dft_c2r_1d(nphi, vrc, vr, FFTW_ESTIMATE);
797 shtns->nphi_lat = nphi;
800 for (
int m = 0; m<nphi/2+1; ++m) {
801 vrc[m] = 0.0; vtc[m] = 0.0; vpc[m] = 0.0;
806 for(
int l=m; l<=ltr; ++l, ++j) {
807 vrr += ylm_lat[j] * creal(Qlm[j]);
808 vst += dylm_lat[j] * creal(Slm[j]);
809 vtt += dylm_lat[j] * creal(Tlm[j]);
815 for (
int m=MRES; m<=mtr*MRES; m+=MRES) {
816 vrr=0; vtt=0; vst=0; vsp=0; vtp=0;
817 for(
int l=m; l<=ltr; ++l, ++j) {
818 vrr += ylm_lat[j] * Qlm[j];
819 vst += dylm_lat[j] * Slm[j];
820 vtt += dylm_lat[j] * Tlm[j];
821 vsp += ylm_lat[j] * Slm[j];
822 vtp += ylm_lat[j] * Tlm[j];
826 vtc[m] = I*m*vtp + vst;
827 vpc[m] = I*m*vsp - vtt;
829 fftw_execute_dft_c2r(shtns->ifft_lat,vrc,vr);
830 fftw_execute_dft_c2r(shtns->ifft_lat,vtc,vt);
831 fftw_execute_dft_c2r(shtns->ifft_lat,vpc,vp);
841 double *vr,
int nphi,
int ltr,
int mtr)
849 if (ltr > LMAX) ltr=LMAX;
850 if (mtr > MMAX) mtr=MMAX;
851 if (mtr*MRES > ltr) mtr=ltr/MRES;
852 if (mtr*2*MRES >= nphi) mtr = (nphi-1)/(2*MRES);
854 ylm_lat = shtns->ylm_lat;
855 if (ylm_lat == NULL) {
856 ylm_lat = (
double *) malloc(
sizeof(
double)* 2*NLM);
857 shtns->ylm_lat = ylm_lat;
859 dylm_lat = ylm_lat + NLM;
861 st_lat = sqrt((1.-cost)*(1.+cost));
862 if (cost != shtns->ct_lat) {
863 shtns->ct_lat = cost;
864 for (
int m=0,j=0; m<=mtr; ++m) {
865 legendre_sphPlm_deriv_array(shtns, ltr, m, cost, st_lat, &ylm_lat[j], &dylm_lat[j]);
866 j += LMAX -m*MRES +1;
870 vrc = (
cplx*) fftw_malloc(
sizeof(
double) * (nphi+2));
872 if (nphi != shtns->nphi_lat) {
873 if (shtns->ifft_lat) fftw_destroy_plan(shtns->ifft_lat);
875 fftw_plan_with_nthreads(1);
877 shtns->ifft_lat = fftw_plan_dft_c2r_1d(nphi, vrc, vr, FFTW_ESTIMATE);
878 shtns->nphi_lat = nphi;
881 for (
int m = 0; m<nphi/2+1; ++m) {
887 for(
int l=m; l<=ltr; ++l, ++j) {
888 vrr += ylm_lat[j] * creal(Qlm[j]);
892 for (
int m=MRES; m<=mtr*MRES; m+=MRES) {
894 for(
int l=m; l<=ltr; ++l, ++j) {
895 vrr += ylm_lat[j] * Qlm[j];
900 fftw_execute_dft_c2r(shtns->ifft_lat,vrc,vr);
916 for (
unsigned l=0; l<=LMAX; l++) {
917 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
918 Zlm[ll] = creal(Rlm[lm]) + I*creal(Ilm[lm]);
921 for (
unsigned m=1; m<=MMAX; m++) {
923 for (
unsigned l=m; l<=LMAX; l++) {
924 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
927 Zlm[ll+m] = rr + I*ii;
928 rr = conj(rr) + I*conj(ii);
942 for (
unsigned l=0; l<=LMAX; l++) {
943 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
944 Rlm[lm] = creal(Zlm[ll]);
945 Ilm[lm] = cimag(Zlm[ll]);
948 double half_parity = 0.5;
949 for (
unsigned m=1; m<=MMAX; m++) {
951 half_parity = -half_parity;
952 for (
unsigned l=m; l<=LMAX; l++) {
953 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
954 cplx b = Zlm[ll-m] * half_parity;
955 cplx a = Zlm[ll+m] * 0.5;
956 Rlm[lm] = (conj(b) + a);
957 Ilm[lm] = (conj(b) - a)*I;
970 const long int nspat = shtns->
nspat;
971 cplx *rlm, *ilm, *Q, *mem;
973 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
976 mem = (
cplx*) VMALLOC( (nspat+2*NLM)*
sizeof(
cplx) );
982 if (shtns->fft_mode & FFT_OOP) Q = mem;
983 fftw_execute_dft(shtns->fft_cplx, z, Q);
986 const double norm = 1.0/NPHI;
987 #pragma omp parallel for schedule(static,1) num_threads(shtns->nthreads)
988 for (
int m=0; m<=MMAX; m++) {
990 spat_to_SH_ml(shtns, 0, Q, rlm, LMAX);
991 spat_to_SH_ml(shtns, 0, (
cplx*)(((
double*)Q)+1), ilm, LMAX);
994 for (
int l=0; l<=LMAX; l++) {
995 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
996 alm[ll] = (creal(rlm[lm]) + I*creal(ilm[lm]))*norm;
1000 long lm =
LM(shtns,m,m);
1001 spat_to_SH_ml(shtns, m, Q + (NPHI-m)*NLAT, rlm + lm, LMAX);
1002 spat_to_SH_ml(shtns, m, Q + m*NLAT, ilm + lm, LMAX);
1004 for (
int l=m; l<=LMAX; l++) {
1005 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1008 alm[ll+m] = rr*norm;
1010 alm[ll-m] = ii*norm;
1025 const long int nspat = shtns->
nspat;
1026 cplx *rlm, *ilm, *Q, *mem;
1028 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1031 mem = (
cplx*) VMALLOC( 2*(nspat + NLM*2)*
sizeof(
double) );
1036 if ((NPHI>1) && (shtns->fft_mode & FFT_OOP)) Q = mem;
1038 #pragma omp parallel for schedule(static,1) num_threads(shtns->nthreads)
1039 for (
int m=0; m<=MMAX; m++) {
1043 for (
int l=0; l<=LMAX; l++) {
1044 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1045 rlm[lm] = creal(alm[ll]);
1046 ilm[lm] = cimag(alm[ll]);
1050 SH_to_spat_ml(shtns, 0, rlm, Q, LMAX);
1051 SH_to_spat_ml(shtns, 0, ilm, tmp, LMAX);
1052 for (
int it=0; it<NLAT; it++) {
1053 ((
double*)Q)[2*it+1] = creal(tmp[it]);
1056 for (
long i=(MMAX+1)*NLAT; i<(NPHI-MMAX)*NLAT; i++) Q[i] = 0.0;
1058 long lm =
LM(shtns,m,m);
1060 cplx* almm = alm - 2*m;
1061 for (
int l=m; l<=LMAX; l++) {
1062 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1071 SH_to_spat_ml(shtns, m, rlm + lm, Q + m*NLAT, LMAX);
1072 SH_to_spat_ml(shtns, m, ilm + lm, Q + (NPHI-m)*NLAT, LMAX);
1076 if (NPHI>1) fftw_execute_dft(shtns->ifft_cplx, Q, z);
1087 const long int nspat = shtns->
nspat;
1088 cplx *zzt, *zzp, *mem, *stlm;
1090 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1093 mem = (
cplx*) VMALLOC( 4*(nspat + NLM*2)*
sizeof(
double) );
1094 stlm = mem + 2*nspat;
1097 if ((NPHI>1) && (shtns->fft_mode & FFT_OOP)) { zzt = mem; zzp = mem + nspat; }
1099 #pragma omp parallel for schedule(static,1) num_threads(shtns->nthreads)
1100 for (
int m=0; m<=MMAX; m++) {
1101 const long stride = LMAX+1-m;
1105 for (
int l=0; l<=LMAX; l++) {
1106 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1107 stlm[lm] = creal(slm[ll]);
1108 stlm[lm+2*stride] = cimag(slm[ll]);
1109 stlm[lm+stride] = creal(tlm[ll]);
1110 stlm[lm+3*stride] = cimag(tlm[ll]);
1115 SHsphtor_to_spat_ml(shtns, 0, stlm, stlm+stride, zzt, zzp, LMAX);
1116 SHsphtor_to_spat_ml(shtns, 0, stlm+2*stride, stlm+3*stride, tt, pp, LMAX);
1117 for (
int it=0; it<NLAT; it++) {
1118 ((
double*)zzt)[2*it+1] = creal(tt[it]);
1119 ((
double*)zzp)[2*it+1] = creal(pp[it]);
1122 for (
long i=(MMAX+1)*NLAT; i<(NPHI-MMAX)*NLAT; i++) {
1123 zzt[i] = 0.0; zzp[i] = 0.0;
1126 long lm = 4 *
LM(shtns,m,m);
1128 for (
int l=m; l<=LMAX; l++) {
1129 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1130 cplx sr = slm[ll+m];
1131 cplx si = slm[ll-m];
1132 cplx tr = tlm[ll+m];
1133 cplx ti = tlm[ll-m];
1138 stlm[lm] = sr; stlm[lm+2*stride] = si;
1139 stlm[lm+stride] = tr; stlm[lm+3*stride] = ti;
1142 lm = 4 *
LM(shtns,m,m);
1143 SHsphtor_to_spat_ml(shtns, m, stlm+lm, stlm+stride+lm, zzt + m*NLAT, zzp + m*NLAT, LMAX);
1144 SHsphtor_to_spat_ml(shtns, -m, stlm+2*stride+lm, stlm+3*stride+lm, zzt + (NPHI-m)*NLAT, zzp + (NPHI-m)*NLAT, LMAX);
1149 fftw_execute_dft(shtns->ifft_cplx, zzt, zt);
1150 fftw_execute_dft(shtns->ifft_cplx, zzp, zp);
1162 const long int nspat = shtns->
nspat;
1163 cplx *zzt, *zzp, *mem, *stlm;
1165 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1168 mem = (
cplx*) VMALLOC( (2*nspat + NLM*4)*
sizeof(
cplx) );
1169 stlm = mem + 2*nspat;
1173 if (shtns->fft_mode & FFT_OOP) {
1174 zzt = mem; zzp = mem + nspat;
1176 fftw_execute_dft(shtns->fft_cplx, zt, zzt);
1177 fftw_execute_dft(shtns->fft_cplx, zp, zzp);
1180 const double norm = 1.0/NPHI;
1181 #pragma omp parallel for schedule(static,1) num_threads(shtns->nthreads)
1182 for (
int m=0; m<=MMAX; m++) {
1183 const long stride = LMAX+1-m;
1185 spat_to_SHsphtor_ml(shtns, 0, zzt, zzp, stlm, stlm+stride, LMAX);
1186 spat_to_SHsphtor_ml(shtns, 0, (
cplx*)(((
double*)zzt)+1), (
cplx*)(((
double*)zzp)+1), stlm+2*stride, stlm+3*stride, LMAX);
1189 for (
int l=0; l<=LMAX; l++) {
1190 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1191 slm[ll] = (creal(stlm[lm]) + I*creal(stlm[lm+2*stride]))*norm;
1192 tlm[ll] = (creal(stlm[lm+stride]) + I*creal(stlm[lm+3*stride]))*norm;
1196 long lm = 4 *
LM(shtns,m,m);
1197 spat_to_SHsphtor_ml(shtns, m, zzt + (NPHI-m)*NLAT, zzp + (NPHI-m)*NLAT, stlm+lm, stlm+lm+stride, LMAX);
1198 spat_to_SHsphtor_ml(shtns, -m, zzt + m*NLAT, zzp + m*NLAT, stlm+lm+2*stride, stlm+lm+3*stride, LMAX);
1200 for (
int l=m; l<=LMAX; l++) {
1201 ll += (l<=MMAX) ? 2*l : 2*MMAX+1;
1203 cplx tr = stlm[lm+stride];
1204 cplx si = stlm[lm+2*stride];
1205 cplx ti = stlm[lm+3*stride];
1206 slm[ll+m] = sr*norm;
1207 tlm[ll+m] = tr*norm;
1208 if (m&1) { si = -si; ti = -ti; }
1209 slm[ll-m] = si*norm;
1210 tlm[ll-m] = ti*norm;
1241 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1245 cplx* ilm = rlm + NLM;
1248 SH_cplx_to_2real(shtns, Qlm, rlm, ilm);
1255 SH_2real_to_cplx(shtns, rlm, ilm, Rlm);
1262 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1266 cplx* ilm = rlm + NLM;
1269 SH_cplx_to_2real(shtns, Qlm, rlm, ilm);
1276 SH_2real_to_cplx(shtns, rlm, ilm, Rlm);
1286 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1290 cplx* ilm = rlm + NLM;
1293 SH_cplx_to_2real(shtns, Qlm, rlm, ilm);
1300 SH_2real_to_cplx(shtns, rlm, ilm, Rlm);
1310 if (MRES != 1) shtns_runerr(
"complex SH requires mres=1.");
1314 cplx* ilm = rlm + NLM;
1317 SH_cplx_to_2real(shtns, Qlm, rlm, ilm);
1324 SH_2real_to_cplx(shtns, rlm, ilm, Rlm);
1333void SH_cplx_Zrotate(shtns_cfg shtns, cplx *Qlm, double alpha, cplx *Rlm)
1335 if (MRES != 1) shtns_runerr("complex SH requires mres=1.");
1337 cplx* eima = (cplx*) VMALLOC( (2*MMAX+1)*sizeof(cplx) );
1340 for (int m=1; m<=MMAX; m++) { // precompute the complex numbers
1341 double cma = cos(m*alpha);
1342 double sma = sin(m*alpha);
1343 eima[m] = cma - I*sma;
1344 eima[-m] = cma + I*sma;
1348 for (unsigned l=0; l<=MMAX; l++) {
1349 for (int m=-l; m<=l; m++) {
1350 Rlm[ll] = Qlm[ll] * eima[m];
1354 for (unsigned l=MMAX+1; l<=LMAX; l++) {
1355 for (int m=-MMAX; m<=MMAX; m++) {
1356 Rlm[ll] = Qlm[ll] * eima[m];
1398typedef double rndu __attribute__ ((vector_size (VSIZE2*8), aligned (8)));
1399typedef double v2du __attribute__ ((vector_size (16), aligned (8)));
1426 r->plm_beta = (
double*) malloc(
sizeof(
double) *
nlm_calc(lmax+1, lmax+1, 1) );
1431 r->eia = 0; r->eig = 0;
1432 r->flag_alpha_gamma = 0;
1440 if (r->plm_beta) free(r->plm_beta);
1446static cplx special_eiphi(
const double phi)
1451 }
else if (phi == M_PI) {
1453 }
else if (phi == M_PI/2) {
1455 }
else if ((phi == 3*M_PI/2) || (phi == -M_PI/2)) {
1457 }
else if (phi == M_PI/4) {
1458 eip = sqrt(0.5) + I*sqrt(0.5);
1459 }
else if (phi == M_PI/3) {
1460 eip = 0.5 + I*sqrt(3)*0.5;
1462 eip = cos(phi) + I*sin(phi);
1470 beta *= r->no_cs_phase;
1471 if UNLIKELY(fabs(beta) > M_PI) {
1472 printf(
"ERROR: angle 'beta' must be between -pi and pi\n");
1476 alpha = (alpha>0) ? alpha-M_PI : alpha+M_PI;
1478 gamma = (gamma>0) ? gamma-M_PI : gamma+M_PI;
1479 }
else if (beta == 0.0) {
1485 const cplx eib = special_eiphi(beta);
1486 r->cos_beta = creal(eib);
1487 r->sin_beta = cimag(eib);
1488 r->eia = special_eiphi(-alpha);
1489 r->eig = special_eiphi(-gamma);
1493 r->flag_alpha_gamma = (alpha != 0) + 2*(gamma != 0);
1495 const int lmax = r->lmax + 1;
1496 #pragma omp parallel for schedule(dynamic) firstprivate(lmax)
1497 for (
int m=0; m<=lmax; m++) {
1498 const long ofs = m*(lmax+2) - (m*(m+1))/2;
1513 if ((Vx==0) && (Vy==0)) {
1514 if (Vz<0) theta = -theta;
1518 double s = sin(0.5*theta);
1519 double c = cos(0.5*theta);
1520 double n = s / sqrt(Vx*Vx + Vy*Vy + Vz*Vz);
1521 Vx *= n; Vy *= n; Vz *= n;
1524 double beta = acos( 1.0 - 2.0*(Vx*Vx + Vy*Vy) );
1529 double alpha = atan2( Vyz - cVx, cVy + Vxz );
1530 double gamma = atan2( Vyz + cVx, cVy - Vxz );
1538int quarter_wigner_d_matrix(
shtns_rot r,
const int l,
double* mx,
const int compressed)
1540 if ((l > r->lmax) || (l<0)) {
1541 printf(
"ERROR: 0 <= l <= lmax not satified.\n");
1545 const int lmax = r->lmax + 1;
1546 const double cos_beta = r->cos_beta;
1547 const double sin_beta = r->sin_beta;
1548 const double*
const plm_beta = r->plm_beta;
1550 const int lw = (compressed) ? l+1 : 2*l+1;
1552 mx += l*lw + l*(compressed==0);
1554 mx[0] = plm_beta[l];
1556 double cb_p1 = (1. + cos_beta)*0.5;
1557 double cb_m1 = (1. - cos_beta)*0.5;
1558 const double d_1 = 1.0/((l)*(l+1.));
1559 for (
int m=1; m<=l; m++) {
1560 const long ofs_m = m*(lmax+2) - (m*(m+1))/2;
1561 const long ofs_mm1 = (m-1)*(lmax+2) - ((m-1)*m)/2;
1562 const long ofs_mp1 = (m+1)*(lmax+2) - ((m+1)*(m+2))/2;
1563 mx[m] = plm_beta[ofs_m + (l-m)];
1565 double a = sqrt( ((l+1+m)*(l+1-m)) * d_1 ) * sin_beta;
1566 double b1 = sqrt( ((l-m+1)*(l-m+2)) * d_1 ) * cb_p1;
1567 double b2 = sqrt( ((l+m+1)*(l+m+2)) * d_1 ) * cb_m1;
1568 mx[lw + m] = b2*plm_beta[ofs_mp1 + (l+1)-(m+1)] + b1*plm_beta[ofs_mm1 + (l+1)-(m-1)] + a*plm_beta[ofs_m + (l+1)-m];
1572 for (
int m=0; m<l; m++) clm[m] = sqrt( (l-m)*(l+m+1) );
1577 const double c_1 = 1.0/clm[0];
1578 for (
int m=-mp; m<l; m++) {
1579 double H = mx[lw*(mp+2) + m] + c_1*( -clm[m-1] * mx[lw*(mp+1) + (m-1)] + clm[m] * mx[lw*(mp+1) + (m+1)]);
1583 double H = mx[lw*(mp+2) + m] - c_1*clm[m-1] * mx[lw*(mp+1) + (m-1)];
1588 for (
int mp=2; mp<=l; mp++) {
1589 const double c_1 = 1.0/clm[mp-1];
1590 const double cmp2 = clm[mp-2];
1591 for (
int m=mp; m<l; m++) {
1592 double H = cmp2 * mx[lw*(mp-2) + m] + clm[m-1] * mx[lw*(mp-1) + (m-1)] - clm[m] * mx[lw*(mp-1) + (m+1)];
1593 double H_ = cmp2 * mx[-lw*(mp-2) + m] - clm[m-1] * mx[-lw*(mp-1) + (m-1)] + clm[m] * mx[-lw*(mp-1) + (m+1)];
1594 mx[lw*mp + m] = H * c_1;
1595 mx[-lw*mp + m] = H_ * c_1;
1598 double H = cmp2 * mx[lw*(mp-2) + m] + clm[m-1] * mx[lw*(mp-1) + (m-1)];
1599 double H_ = cmp2 * mx[-lw*(mp-2) + m] - clm[m-1] * mx[-lw*(mp-1) + (m-1)];
1600 mx[lw*mp + m] = H * c_1;
1601 mx[-lw*mp + m] = H_ * c_1;
1620 const int lw = quarter_wigner_d_matrix(r, l, mx, 0);
1621 if (lw <= 0)
return 0;
1624 mx += l*(2*l+1) + l;
1627 for (
int m=1; m<=l; m++) {
1628 mx[(2*l+1)*m + -m] = mx[(2*l+1)*(-m) + m];
1629 mx[(2*l+1)*(-m) - m] = mx[(2*l+1)*m + m];
1631 for (
int mp=-l+1; mp<l; mp++) {
1632 for (
int m=abs(mp)+1; m<=l; m++) {
1633 double x = mx[(2*l+1)*mp + m];
1634 double parity = (1-2*((m-mp)&1));
1635 mx[(2*l+1)*(-m) - mp] = x;
1636 mx[(2*l+1)*m + mp] = x * parity;
1637 mx[(2*l+1)*(-mp) - m] = x * parity;
1646void shtns_rotation_apply_cplx(shtns_rot r, cplx* Zlm, cplx* Rlm)
1648 const int lmax = r->lmax;
1650 Rlm[0] = Zlm[0]; // copy l=0
1652 #pragma omp for schedule(dynamic)
1653 for (int l=1; l<=lmax; l++) {
1654 const int lw = l+1; // line width of compressed matrix.
1655 double* mx0 = (double*) malloc( sizeof(double) * (2*l+1)*lw );
1656 memset(mx0, 0, sizeof(double)*(2*l+1)*lw);
1658 quarter_wigner_d_matrix(r, l, mx0, 1); // 1 : compressed matrix (saves memory and time)
1659 const double* mx = mx0 + l*lw; // allow indexing by negative m and m'
1661 const cplx* zl = Zlm + l*(l+1);
1662 cplx* rl = Rlm + l*(l+1);
1664 for (int m=-l; m<=l; m++) {
1666 // only 1/4 of the matrix is read (4 times).
1667 for (int mp=l; mp>=abs(m); mp--) { // third domain (right) : populated.
1668 rm += zl[mp] * mx[m*lw + mp];
1671 for (int mp=-m-1; mp>=m; mp--) { // second domain (top)
1672 rm += zl[mp] * mx[-mp*lw - m]; // exchange m and m' and change signs => no parity factor
1675 for (int mp=m-1; mp>=-m; mp-=2) { // second domain (bottom) => always even number of elements
1676 rm -= zl[mp] * mx[mp*lw + m]; // exchange m and m' (negative parity)
1677 rm += zl[mp-1] * mx[(mp-1)*lw + m]; // exchange m and m' (positive parity)
1680 for (int mp=-abs(m)-1; mp >-l; mp-=2) { // first domain (left)
1681 rm -= zl[mp] * mx[-m*lw - mp]; // change sign of m and m'
1682 rm += zl[mp-1] * mx[-m*lw - (mp-1)]; // change sign of m and m'
1684 if ((l+m)&1) { int mp = -l; // boundary condition.
1685 rm -= zl[mp] * mx[-m*lw - mp]; // change sign of m and m'
1697 const int lmax = r->lmax+1;
1698 const int mmax = r->mmax;
1700 if (r->beta == 0.0) {
1718 #pragma omp parallel firstprivate(lmax,mmax)
1720 const double cos_beta = r->cos_beta;
1721 const double sin_beta = r->sin_beta;
1722 const double*
const plm_beta = r->plm_beta;
1723 const int flag_ag = r->flag_alpha_gamma;
1725 const double cb_p1 = (1. + cos_beta)*0.5;
1726 const double cb_m1 = (1. - cos_beta)*0.5;
1728 const double m0_renorm = r->m0_renorm;
1729 const double m0_renorm_1 = 1.0 / m0_renorm;
1731 const cplx eia = r->eia;
1732 const cplx eig = r->eig;
1735 cplx*
const rl = (
cplx*) malloc(
sizeof(
double) * (9*lmax +2));
1737 #pragma omp for schedule(dynamic,1)
1738 for (
int l=lmax-1; l>0; l--) {
1740 const int mlim = (l <= mmax) ? l : mmax;
1741 cplx*
const ql = rl + (l+1);
1742 double*
const m0 = (
double*) (rl + 2*(l+1));
1743 memset(rl, 0,
sizeof(
cplx)*(2*l+1));
1747 ql[0] = creal(Qlm[lm]) * m0_renorm;
1750 for (
int m=1; m<=mlim; m++) {
1752 ql[m] = Qlm[lm] * eima;
1756 for (
int m=1; m<=mlim; m++) {
1758 ((v2d*)ql)[m] = ((v2d*)Qlm)[lm];
1761 for (
int m=mlim+1; m<=l; m++) ql[m] = 0;
1763 m0[0] = plm_beta[l];
1765 double r0 = 0.5*creal(ql[0])*m0[0];
1768 const double d_1 = 1.0/((l)*(l+1.));
1771 long ofs_m = (lmax+2) - 1;
1773 for (
int m=1; m<=l; m++) {
1774 double H = plm_beta[ofs_m + (l-m)];
1776 r0 += creal(ql[m]) * H;
1777 const double a = sqrt( ((l+1+m)*(l+1-m)) * d_1 ) * sin_beta;
1778 rl[m] += creal(ql[0]) * H*parity;
1779 const double b1 = sqrt( ((l-m+1)*(l-m+2)) * d_1 ) * cb_p1;
1780 long ofs_mp1 = (m+1)*(lmax+2) - ((m+1)*(m+2))/2;
1781 const double b2 = sqrt( ((l+m+1)*(l+m+2)) * d_1 ) * cb_m1;
1783 H = b2*plm_beta[ofs_mp1 + (l+1)-(m+1)] + b1*plm_beta[ofs_mm1 + (l+1)-(m-1)] + a*plm_beta[ofs_m + (l+1)-m];
1789 rl[m] += ql[1] * H*parity;
1795 double* clm = m0 + 4*lw +1;
1797 for (
int m=0; m<l; m++) clm[m] = sqrt( (l-m)*(l+m+1) );
1801 double* mx1_ = m0 + 2*lw;
1802 double* mx0_ = m0 + 3*lw;
1803 memcpy(mx1_, m0,
sizeof(
double)*2*lw);
1805 s2d conj_parity = {-1.0, 1.0};
1807 {
const int mp = -1;
1808 double clm_1 = clm[0];
1809 const double c_1 = 1.0/clm_1;
1810 double mx1_1 = mx1_[-mp-1];
1811 double mx1_0 = mx1_[-mp];
1812 v2d rmp = vdup(0.0);
1814 v2d zlmp = ((v2d*)ql)[-mp] * conj_parity;
1816 cplx zlmp = conj(ql[-mp]) * (1-2*(mp&1));
1820 double clm0 = clm[m];
1821 double clm1 = clm[m+1];
1822 double mx11 = mx1_[m+1];
1823 double mx12 = mx1_[m+2];
1824 double H0 = mx0_[m] + c_1*( -clm_1 * mx1_1 + clm0 * mx11 );
1825 double H1 = mx0_[m+1] + c_1*( -clm0 * mx1_0 + clm1 * mx12 );
1826 clm_1 = clm1; mx1_1 = mx11; mx1_0 = mx12;
1829 rmp += ((v2d*)ql)[m] * vdup(H0) + ((v2d*)ql)[m+1] * vdup(H1);
1831 ((v2d*)rl)[m] += zlmp * vdup(H0);
1833 ((v2d*)rl)[m+1] -= zlmp * vdup(H1);
1836 double H0 = mx0_[m] - c_1 * clm_1 * mx1_1;
1838 rmp += ((v2d*)ql)[m] * vdup(H0);
1840 ((v2d*)rl)[m] += zlmp * vdup(H0);
1844 ((v2d*)rl)[-mp] += rmp * conj_parity;
1845 conj_parity = - conj_parity;
1847 rl[-mp] += conj(rmp)*(1-2*(mp&1));
1849 double* t = mx0_; mx0_ = mx1_; mx1_ = t;
1854 double* mx1 = m0 + lw;
1855 for (
int mp=2; mp<=l; mp++) {
1856 const double cmp2 = clm[mp-2];
1857 double clm_1 = clm[mp-1];
1858 const double c_1 = 1.0/clm_1;
1860 v2d rmp = vdup(0.0);
1861 v2d zlmp = ((v2d*)ql)[mp];
1862 v2d rmp_ = vdup(0.0);
1864 v2d zlmp_ = zlmp * conj_parity;
1866 cplx zlmp_ = conj(zlmp) * (1-2*(mp&1));
1870 for (; m<=l-(VSIZE2-1); m+=VSIZE2) {
1871 rnd clm_1 = *((rndu*)(clm+m-1));
1872 rnd clm0 = *((rndu*)(clm+m));
1874 rnd mx1_1 = *((rndu*)(mx1+m-1));
1875 rnd mx11 = *((rndu*)(mx1+m+1));
1877 rnd mx1__1 = *((rndu*)(mx1_+m-1));
1878 rnd mx11_ = *((rndu*)(mx1_+m+1));
1880 rnd mx00 = *((rndu*)(mx0+m));
1881 rnd mx00_ = *((rndu*)(mx0_+m));
1883 rnd H = (vall(cmp2) * mx00 + clm_1 * mx1_1 - clm0 * mx11) * vall(c_1);
1884 rnd H_ = (vall(cmp2) * mx00_ - clm_1 * mx1__1 + clm0 * mx11_) * vall(c_1);
1885 *((rndu*)(mx0+m)) = H;
1886 *((rndu*)(mx0_+m)) = H_;
1910 double clm_1 = clm[m-1];
1911 double clm0 = clm[m];
1912 double mx1_1 = mx1[m-1];
1913 double mx1__1 = mx1_[m-1];
1914 double mx11 = mx1[m+1];
1915 double mx11_ = mx1_[m+1];
1916 double H0 = cmp2 * mx0[m] + clm_1 * mx1_1 - clm0 * mx11;
1917 double H0_ = cmp2 * mx0_[m] - clm_1 * mx1__1 + clm0 * mx11_;
1926 double H0 = mx0[m];
double H1 = mx0[m+1];
1927 double H0_ = mx0_[m];
double H1_ = mx0_[m+1];
1928 rmp += ((v2d*)ql)[m] * vdup(H0) + ((v2d*)ql)[m+1] * vdup(H1);
1929 rmp_ += ((v2d*)ql)[m] * vdup(H0_) + ((v2d*)ql)[m+1] * vdup(H1_);
1931 ((v2d*)rl)[m] += zlmp * vdup(H0) + zlmp_ * vdup(H0_);
1933 ((v2d*)rl)[m+1] -= zlmp * vdup(H1) + zlmp_ * vdup(H1_);
1937 double H0_ = mx0_[m];
1938 rmp += ((v2d*)ql)[m] * vdup(H0);
1939 rmp_ += ((v2d*)ql)[m] * vdup(H0_);
1941 ((v2d*)rl)[m] += zlmp * vdup(H0) + zlmp_ * vdup(H0_);
1945 ((v2d*)rl)[mp] += rmp + rmp_ * conj_parity;
1946 conj_parity = - conj_parity;
1948 rl[mp] += rmp + conj(rmp_)*(1-2*(mp&1));
1950 double* t = mx0; mx0 = mx1; mx1 = t;
1951 double* t_ = mx0_; mx0_ = mx1_; mx1_ = t_;
1956 Rlm[lm] = creal(rl[0]) * m0_renorm_1;
1959 for (
int m=1; m<=mlim; m++) {
1961 Rlm[lm] = rl[m] * eimg;
1965 for (
int m=1; m<=mlim; m++) {
1967 ((v2d*)Rlm)[lm] = ((v2d*)rl)[m];
1979 const int lmax = r->lmax+1;
1980 const int mmax = r->mmax;
1986 if (r->beta == 0.0) {
1988 const cplx eia = r->eia;
1989 for (
int l=0; l<lmax; l++) {
1990 long ll = (l<=mmax) ? l*(l+1) : mmax*(2*l-mmax) + l;
1991 cplx eim_alpha = eia;
1992 Rlm[ll + 0] = Zlm[ll + 0];
1993 for (
int m=1; m<=l; m++) {
1994 Rlm[ll - m] = Zlm[ll - m] * conj(eim_alpha);
1995 Rlm[ll + m] = Zlm[ll + m] * eim_alpha;
2004 #pragma omp parallel firstprivate(lmax,mmax)
2006 const double cos_beta = r->cos_beta;
2007 const double sin_beta = r->sin_beta;
2008 const double*
const plm_beta = r->plm_beta;
2010 const double cb_p1 = (1. + cos_beta)*0.5;
2011 const double cb_m1 = (1. - cos_beta)*0.5;
2013 const int flag_ag = r->flag_alpha_gamma;
2014 const cplx eia = r->eia;
2015 const cplx eig = r->eig;
2016 const int ntmp = 1 + (flag_ag & 1);
2019 v2d*
const buf = (v2d*) malloc(
sizeof(
double) * (2*ntmp*(2*lmax-1) + 5*lmax +2));
2021 #pragma omp for schedule(dynamic,1)
2022 for (
int l=lmax-1; l>0; l--) {
2024 const int mlim = (l <= mmax) ? l : mmax;
2025 const long ll = (l <= mmax) ? l*(l+1) : mmax*(2*l-mmax) + l;
2027 double*
const m0 = (
double*) (buf + ntmp*(2*l+1));
2028 memset(rl, 0,
sizeof(
cplx)*(2*l+1));
2030 v2d* zl = (v2d*)Zlm + l*(l+1);
2033 ((
cplx*)zl)[0] = Zlm[l*(l+1) + 0];
2034 cplx eim_alpha = eia;
2035 for (
int m=1; m<=mlim; m++) {
2036 ((
cplx*)zl)[-m] = Zlm[ll - m] * conj(eim_alpha);
2037 ((
cplx*)zl)[m] = Zlm[ll + m] * eim_alpha;
2040 }
else if (mmax < l) {
2042 for (
int m=-mmax; m<=mmax; m++) ((
cplx*)zl)[m] = Zlm[ll+m];
2044 for (
int m=mlim+1; m<=l; m++) {
2045 ((
cplx*)zl)[-m] = 0;
2049 m0[0] = plm_beta[l];
2051 v2d r0 = zl[0]*vdup(m0[0]);
2054 const double d_1 = 1.0/((l)*(l+1.));
2056 v2d rm1 = vdup(0.0);
2058 long ofs_m = (lmax+2) - 1;
2060 for (
int m=1; m<=l; m++) {
2061 double H = plm_beta[ofs_m + (l-m)];
2063 r0 += zl[m] * vdup(H);
2064 r0 += zl[-m] * vdup(H*parity);
2065 const double a = sqrt( ((l+1+m)*(l+1-m)) * d_1 ) * sin_beta;
2066 rl[-m] += zl[0] * vdup(H);
2067 rl[m] += zl[0] * vdup(H*parity);
2068 const double b1 = sqrt( ((l-m+1)*(l-m+2)) * d_1 ) * cb_p1;
2069 long ofs_mp1 = (m+1)*(lmax+2) - ((m+1)*(m+2))/2;
2070 const double b2 = sqrt( ((l+m+1)*(l+m+2)) * d_1 ) * cb_m1;
2072 H = b2*plm_beta[ofs_mp1 + (l+1)-(m+1)] + b1*plm_beta[ofs_mm1 + (l+1)-(m-1)] + a*plm_beta[ofs_m + (l+1)-m];
2074 r1 += zl[m] * vdup(H);
2076 rm1 += zl[-m] * vdup(H*parity);
2079 rl[-m] += zl[-1] * vdup(H);
2080 rl[m] += zl[1] * vdup(H*parity);
2087 double* clm = m0 + 4*lw +1;
2089 for (
int m=0; m<l; m++) clm[m] = sqrt( (l-m)*(l+m+1) );
2093 double* mx1_ = m0 + 2*lw;
2094 double* mx0_ = m0 + 3*lw;
2095 memcpy(mx1_, m0,
sizeof(
double)*2*lw);
2096 {
const int mp = -1;
2097 double clm_1 = clm[-mp-1];
2098 const double c_1 = 1.0/clm_1;
2099 double mx1_1 = mx1_[-mp-1];
2100 double mx1_0 = mx1_[-mp];
2101 v2d rmp = vdup(0.0);
2102 v2d rmmp = vdup(0.0);
2103 v2d zlmmp = zl[-mp];
2107 double clm0 = clm[m];
2108 double clm1 = clm[m+1];
2109 double mx11 = mx1_[m+1];
2110 double mx12 = mx1_[m+2];
2111 double H0 = mx0_[m] + c_1*( - clm_1 * mx1_1 + clm0 * mx11);
2112 double H1 = mx0_[m+1] + c_1*( - clm0 * mx1_0 + clm1 * mx12);
2113 clm_1 = clm1; mx1_1 = mx11; mx1_0 = mx12;
2116 rmp += zl[m] * vdup(H0) + zl[m+1] * vdup(H1);
2117 rmmp += zl[-m] * vdup(H0) - zl[-m-1] * vdup(H1);
2118 rl[-m-1] += zlmmp * vdup(H1);
2120 rl[-m] += zlmmp * vdup(H0);
2121 rl[m] += zlmp * vdup(H0);
2123 rl[m+1] -= zlmp * vdup(H1);
2126 double H0 = mx0_[m] - c_1 * clm_1 * mx1_1;
2128 rmp += zl[m] * vdup(H0);
2129 rmmp += zl[-m] * vdup(H0);
2131 rl[-m] += zlmmp * vdup(H0);
2132 rl[m] += zlmp * vdup(H0);
2137 double* t = mx0_; mx0_ = mx1_; mx1_ = t;
2142 double* mx1 = m0 + lw;
2143 for (
int mp=2; mp<=l; mp++) {
2144 const double cmp2 = clm[mp-2];
2145 double clm_1 = clm[mp-1];
2146 const double c_1 = 1.0/clm_1;
2149 v2d rmp = vdup(0.0);
2150 v2d rmmp = vdup(0.0);
2151 v2d zlmmp = zl[-mp];
2154 for (; m<=l-(VSIZE2-1); m+=VSIZE2) {
2155 rnd clm_1 = *((rndu*)(clm+m-1));
2156 rnd clm0 = *((rndu*)(clm+m));
2158 rnd mx1_1 = *((rndu*)(mx1+m-1));
2159 rnd mx11 = *((rndu*)(mx1+m+1));
2161 rnd mx1__1 = *((rndu*)(mx1_+m-1));
2162 rnd mx11_ = *((rndu*)(mx1_+m+1));
2164 rnd mx00 = *((rndu*)(mx0+m));
2165 rnd mx00_ = *((rndu*)(mx0_+m));
2167 rnd H = (vall(cmp2) * mx00 + clm_1 * mx1_1 - clm0 * mx11) * vall(c_1);
2168 rnd H_ = (vall(cmp2) * mx00_ - clm_1 * mx1__1 + clm0 * mx11_) * vall(c_1);
2170 *((rndu*)(mx0+m)) = H;
2171 *((rndu*)(mx0_+m)) = H_;
2195 double clm_1 = clm[m-1];
2196 double clm0 = clm[m];
2197 double mx1_1 = mx1[m-1];
2198 double mx1__1 = mx1_[m-1];
2199 double mx11 = mx1[m+1];
2200 double mx11_ = mx1_[m+1];
2201 double H0 = (cmp2 * mx0[m] + clm_1 * mx1_1 - clm0 * mx11) * c_1;
2202 double H0_ = (cmp2 * mx0_[m] - clm_1 * mx1__1 + clm0 * mx11_) * c_1;
2209 double H0 = mx0[m];
double H1 = mx0[m+1];
2210 double H0_ = mx0_[m];
double H1_ = mx0_[m+1];
2211 rmp += zl[m] * vdup(H0) + zl[m+1] * vdup(H1);
2212 rmmp += zl[m] * vdup(H0_) + zl[m+1] * vdup(H1_);
2213 rmmp += zl[-m] * vdup(H0) - zl[-m-1] * vdup(H1);
2214 rmp += zl[-m] * vdup(H0_) - zl[-m-1] * vdup(H1_);
2215 rl[-m-1] += zlmmp * vdup(H1) + zlmp * vdup(H1_);
2217 rl[-m] += zlmmp * vdup(H0) + zlmp * vdup(H0_);
2218 rl[m] += zlmp * vdup(H0) + zlmmp * vdup(H0_);
2220 rl[m+1] -= zlmp * vdup(H1) + zlmmp * vdup(H1_);
2224 double H0_ = mx0_[m];
2225 rmp += zl[m] * vdup(H0);
2226 rmmp += zl[m] * vdup(H0_);
2227 rmmp += zl[-m] * vdup(H0);
2228 rmp += zl[-m] * vdup(H0_);
2230 rl[-m] += zlmmp * vdup(H0) + zlmp * vdup(H0_);
2231 rl[m] += zlmp * vdup(H0) + zlmmp * vdup(H0_);
2236 double* t = mx0; mx0 = mx1; mx1 = t;
2237 double* t_ = mx0_; mx0_ = mx1_; mx1_ = t_;
2241 Rlm[ll + 0] = ((
cplx*)rl)[0];
2242 cplx eim_gamma = eig;
2243 for (
int m=1; m<=mlim; m++) {
2244 Rlm[ll - m] = ((
cplx*)rl)[-m] * conj(eim_gamma);
2245 Rlm[ll + m] = ((
cplx*)rl)[m] * eim_gamma;
2249 memcpy(Rlm + ll-mlim, rl-mlim,
sizeof(
cplx)*(2*mlim+1));
void SH_Xrotate90(shtns_cfg shtns, cplx *Qlm, cplx *Rlm)
rotate Qlm by 90 degrees around X axis and store the result in Rlm.
Definition sht_func.c:367
void SH_Yrotate(shtns_cfg shtns, cplx *Qlm, double alpha, cplx *Rlm)
rotate Qlm around Y axis by arbitrary angle, using composition of rotations. Store the result in Rlm.
Definition sht_func.c:404
void SH_Zrotate(shtns_cfg shtns, cplx *Qlm, double alpha, cplx *Rlm)
Rotate a SH representation Qlm around the z-axis by angle alpha (in radians), which is the same as ro...
Definition sht_func.c:38
void SH_Yrotate90(shtns_cfg shtns, cplx *Qlm, cplx *Rlm)
rotate Qlm by 90 degrees around Y axis and store the result in Rlm.
Definition sht_func.c:386
void shtns_destroy(shtns_cfg)
free memory of given config, which cannot be used afterwards.
Definition sht_init.c:1268
shtns_cfg shtns_create(int lmax, int mmax, int mres, enum shtns_norm norm)
Defines the sizes of the spectral description. Use for advanced initialization.
Definition sht_init.c:1100
long nlm_calc(long lmax, long mmax, long mres)
compute number of spherical harmonics modes (l,m) to represent a real-valued spatial field for given ...
Definition sht_init.c:98
void SHqst_to_point(shtns_cfg shtns, cplx *Qlm, cplx *Slm, cplx *Tlm, double cost, double phi, double *vr, double *vt, double *vp)
Evaluate vector SH representation Qlm at physical point defined by cost = cos(theta) and phi.
Definition sht_func.c:694
void SH_to_point_cplx(shtns_cfg shtns, cplx *alm, double cost, double phi, cplx *z_out)
Evaluate scalar SH representation of complex field alm at physical point defined by cost = cos(theta)...
Definition sht_func.c:541
void SH_to_lat(shtns_cfg shtns, cplx *Qlm, double cost, double *vr, int nphi, int ltr, int mtr)
synthesis at a given latitude, on nphi equispaced longitude points.
Definition sht_func.c:840
void SHqst_to_lat(shtns_cfg shtns, cplx *Qlm, cplx *Slm, cplx *Tlm, double cost, double *vr, double *vt, double *vp, int nphi, int ltr, int mtr)
synthesis at a given latitude, on nphi equispaced longitude points.
Definition sht_func.c:757
double SH_to_point(shtns_cfg shtns, cplx *Qlm, double cost, double phi)
Evaluate scalar SH representation Qlm at physical point defined by cost = cos(theta) and phi.
Definition sht_func.c:610
void SH_mul_mx(shtns_cfg shtns, double *mx, cplx *Qlm, cplx *Rlm)
Multiplication of Qlm by a matrix involving l+1 and l-1 only.
Definition sht_func.c:509
void st_dt_matrix(shtns_cfg shtns, double *mx)
fill mx with the coefficients of operator sin(theta).d/dtheta
Definition sht_func.c:496
void mul_ct_matrix(shtns_cfg shtns, double *mx)
fill mx with the coefficients for multiplication by cos(theta)
Definition sht_func.c:478
void shtns_rotation_set_angle_axis(shtns_rot r, double theta, double Vx, double Vy, double Vz)
Sets a rotation by angle theta around axis of cartesian coordinates (Vx,Vy,Vz), and compute associate...
Definition sht_func.c:1511
void shtns_rotation_apply_cplx(shtns_rot r, cplx *Zlm, cplx *Rlm)
rotate Zlm, store the result in Rlm, without ever storing the wigner-d matrices (on-the-fly operation...
Definition sht_func.c:1977
void shtns_rotation_destroy(shtns_rot r)
Release memory allocated for the rotation object.
Definition sht_func.c:1436
void shtns_rotation_set_angles_ZXZ(shtns_rot r, double alpha, double beta, double gamma)
Set the rotation angles, and compute associated Legendre functions, given the 3 intrinsic Euler angle...
Definition sht_func.c:1505
shtns_rot shtns_rotation_create(const int lmax, const int mmax, int norm)
Allocate memory and precompute some recurrence coefficients for rotation (independent of angle).
Definition sht_func.c:1419
void shtns_rotation_set_angles_ZYZ(shtns_rot r, double alpha, double beta, double gamma)
Set the rotation angles, and compute associated Legendre functions, given the 3 intrinsic Euler angle...
Definition sht_func.c:1468
void shtns_rotation_apply_real(shtns_rot r, cplx *Qlm, cplx *Rlm)
apply rotation to the orthonormal spherical harmonic expansion Qlm of a real field,...
Definition sht_func.c:1695
int shtns_rotation_wigner_d_matrix(shtns_rot r, const int l, double *mx)
Generate spherical-harmonic rotation matrix for given degree l and orthonormal convention (Wigner-d m...
Definition sht_func.c:1612
void SHqst_to_spat_cplx(shtns_cfg shtns, cplx *qlm, cplx *slm, cplx *tlm, cplx *zr, cplx *zt, cplx *zp)
complex vector transform (3D).
Definition sht_func.c:1231
void SHsphtor_to_spat_cplx(shtns_cfg shtns, cplx *slm, cplx *tlm, cplx *zt, cplx *zp)
complex vector transform (2D).
Definition sht_func.c:1085
void spat_cplx_to_SHsphtor(shtns_cfg shtns, cplx *zt, cplx *zp, cplx *slm, cplx *tlm)
complex vector transform (2D).
Definition sht_func.c:1160
void spat_cplx_to_SHqst(shtns_cfg shtns, cplx *zr, cplx *zt, cplx *zp, cplx *qlm, cplx *slm, cplx *tlm)
complex vector transform (3D).
Definition sht_func.c:1222
void SH_to_spat_cplx(shtns_cfg shtns, cplx *alm, cplx *z)
complex scalar transform.
Definition sht_func.c:1023
void spat_cplx_to_SH(shtns_cfg shtns, cplx *z, cplx *alm)
complex scalar transform.
Definition sht_func.c:968
struct shtns_rot_ * shtns_rot
pointer to data structure describing a rotation, returned by shtns_rotation_create().
Definition shtns.h:44
_Complex double cplx
double precision complex number data type
Definition shtns.h:28
#define LM(shtns, l, m)
LM(shtns, l,m) : macro returning array index for given l and m, corresponding to config shtns.
Definition shtns.h:108
int legendre_sphPlm_array(shtns_cfg shtns, const int lmax, const int im, const double x, double *yl)
Compute values of legendre polynomials noramalized for spherical harmonics, for a range of l=m....
#define SHT_REAL_NORM
use a "real" normalization. (add to last argument of shtns_create)
Definition shtns.h:54
#define SHT_NO_CS_PHASE
don't include Condon-Shortley phase (add to last argument of shtns_create)
Definition shtns.h:53
@ sht_schmidt
Schmidt semi-normalized : 4.pi/(2l+1)
Definition shtns.h:50
#define LiM(shtns, l, im)
LiM(shtns, l,im) : macro returning array index for given l and im, corresponding to config shtns.
Definition shtns.h:106
const unsigned short lmax
maximum degree (lmax) of spherical harmonics.
Definition shtns.h:82
const unsigned short *const li
degree l for given mode index (size nlm) : li[lm]
Definition shtns.h:89
const unsigned short mmax
maximum order (mmax*mres) of spherical harmonics.
Definition shtns.h:83
const unsigned int nspat
number of real numbers that must be allocated in a spatial field.
Definition shtns.h:88
const unsigned short mres
the periodicity along the phi axis.
Definition shtns.h:84