NFFT  3.3.1
fastsum.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00025 #include "config.h"
00026 
00027 #include <stdlib.h>
00028 #include <math.h>
00029 #ifdef HAVE_COMPLEX_H
00030 #include <complex.h>
00031 #endif
00032 
00033 #include "nfft3.h"
00034 #include "fastsum.h"
00035 #include "infft.h"
00036 
00037 // Required for test if (ths->k == one_over_x)
00038 #include "kernels.h"
00039 
00046 static int max_i(int a, int b)
00047 {
00048   return a >= b ? a : b;
00049 }
00050 
00052 static R fak(int n)
00053 {
00054   if (n <= 1)
00055     return K(1.0);
00056   else
00057     return (R)(n) * fak(n - 1);
00058 }
00059 
00061 static R binom(int n, int m)
00062 {
00063   return fak(n) / fak(m) / fak(n - m);
00064 }
00065 
00067 static R BasisPoly(int m, int r, R xx)
00068 {
00069   int k;
00070   R sum = K(0.0);
00071 
00072   for (k = 0; k <= m - r; k++)
00073   {
00074     sum += binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k);
00075   }
00076   return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1))
00077       / (R)(1 << (m + 1)) / fak(r); /* 1<<(m+1) = 2^(m+1) */
00078 }
00079 
00081 C regkern(kernel k, R xx, int p, const R *param, R a, R b)
00082 {
00083   int r;
00084   C sum = K(0.0);
00085 
00086   if (xx < -K(0.5))
00087     xx = -K(0.5);
00088   if (xx > K(0.5))
00089     xx = K(0.5);
00090   if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
00091   {
00092     return k(xx, 0, param);
00093   }
00094   else if (xx < -K(0.5) + b)
00095   {
00096     sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
00097         * BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b);
00098     for (r = 0; r < p; r++)
00099     {
00100       sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param)
00101           * BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b);
00102     }
00103     return sum;
00104   }
00105   else if ((xx > -a) && (xx < a))
00106   {
00107     for (r = 0; r < p; r++)
00108     {
00109       sum +=
00110           POW(a, (R) r)
00111               * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
00112                   + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
00113                       * (r & 1 ? -1 : 1));
00114     }
00115     return sum;
00116   }
00117   else if (xx > K(0.5) - b)
00118   {
00119     sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
00120         * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
00121     for (r = 0; r < p; r++)
00122     {
00123       sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
00124           * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
00125     }
00126     return sum;
00127   }
00128   return k(xx, 0, param);
00129 }
00130 
00134 static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
00135 {
00136   int r;
00137   C sum = K(0.0);
00138 
00139   if (xx < -K(0.5))
00140     xx = -K(0.5);
00141   if (xx > K(0.5))
00142     xx = K(0.5);
00143   if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
00144   {
00145     return k(xx, 0, param);
00146   }
00147   else if ((xx > -a) && (xx < a))
00148   {
00149     for (r = 0; r < p; r++)
00150     {
00151       sum +=
00152           POW(a, (R) r)
00153               * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
00154                   + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
00155                       * (r & 1 ? -1 : 1));
00156     }
00157     return sum;
00158   }
00159   else if (xx < -K(0.5) + b)
00160   {
00161     for (r = 0; r < p; r++)
00162     {
00163       sum += POW(b, (R) r)
00164           * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx + K(0.5)) / b)
00165               + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx + K(0.5)) / b)
00166                   * (r & 1 ? -1 : 1));
00167     }
00168     return sum;
00169   }
00170   else if (xx > K(0.5) - b)
00171   {
00172     for (r = 0; r < p; r++)
00173     {
00174       sum += POW(b, (R) r)
00175           * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx - K(0.5)) / b)
00176               + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx - K(0.5)) / b)
00177                   * (r & 1 ? -1 : 1));
00178     }
00179     return sum;
00180   }
00181   return k(xx, 0, param);
00182 }
00183 
00185 //static C regkern2(kernel k, R xx, int p, const R *param, R a, R b)
00186 //{
00187 //  int r;
00188 //  C sum = K(0.0);
00189 //
00190 //  xx = FABS(xx);
00191 //
00192 //  if (xx > K(0.5))
00193 //  {
00194 //    for (r = 0; r < p; r++)
00195 //    {
00196 //      sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
00197 //          * (BasisPoly(p - 1, r, 0) + BasisPoly(p - 1, r, 0));
00198 //    }
00199 //    return sum;
00200 //  }
00201 //  else if ((a <= xx) && (xx <= K(0.5) - b))
00202 //  {
00203 //    return k(xx, 0, param);
00204 //  }
00205 //  else if (xx < a)
00206 //  {
00207 //    for (r = 0; r < p; r++)
00208 //    {
00209 //      sum += POW(-a, (R) r) * k(a, r, param)
00210 //          * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
00211 //    }
00212 //    return sum;
00213 //  }
00214 //  else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
00215 //  {
00216 //    for (r = 0; r < p; r++)
00217 //    {
00218 //      sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
00219 //          * (BasisPoly(p - 1, r, (xx - K(0.5)) / b)
00220 //              + BasisPoly(p - 1, r, -(xx - K(0.5)) / b));
00221 //    }
00222 //    return sum;
00223 //  }
00224 //  return K(0.0);
00225 //}
00226 
00230 static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
00231 {
00232   int r;
00233   C sum = K(0.0);
00234 
00235   xx = FABS(xx);
00236 
00237   if (xx >= K(0.5))
00238   {
00239     /*return kern(typ,c,0,K(0.5));*/
00240     xx = K(0.5);
00241   }
00242   /* else */
00243   if ((a <= xx) && (xx <= K(0.5) - b))
00244   {
00245     return k(xx, 0, param);
00246   }
00247   else if (xx < a)
00248   {
00249     for (r = 0; r < p; r++)
00250     {
00251       sum += POW(-a, (R) r) * k(a, r, param)
00252           * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
00253     }
00254     /*sum=kern(typ,c,0,xx); */
00255     return sum;
00256   }
00257   else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
00258   {
00259     sum = k(K(0.5), 0, param) * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
00260     /* sum=regkern2(typ,c,p,a,b, K(0.5))*BasisPoly(p-1,0,-K(2.0)*xx/b+(K(1.0)-b)/b); */
00261     for (r = 0; r < p; r++)
00262     {
00263       sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
00264           * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
00265     }
00266     return sum;
00267   }
00268   return K(0.0);
00269 }
00270 
00272 //static C linintkern(const R x, const C *Add, const int Ad, const R a)
00273 //{
00274 //  R c, c1, c3;
00275 //  int r;
00276 //  C f1, f2;
00277 //
00278 //  c = x * Ad / a;
00279 //  r = (int)(LRINT(c));
00280 //  r = abs(r);
00281 //  f1 = Add[r];
00282 //  f2 = Add[r + 1];
00283 //  c = FABS(c);
00284 //  c1 = c - r;
00285 //  c3 = c1 - K(1.0);
00286 //  return (-f1 * c3 + f2 * c1);
00287 //}
00288 //
00289 //static C quadrintkern(const R x, const C *Add, const int Ad, const R a)
00290 //{
00291 //  R c, c1, c2, c3;
00292 //  int r;
00293 //  C f0, f1, f2;
00294 //
00295 //  c = x * Ad / a;
00296 //  r = (int)(LRINT(c));
00297 //  r = abs(r);
00298 //  if (r == 0)
00299 //  {
00300 //    f0 = Add[r + 1];
00301 //    f1 = Add[r];
00302 //    f2 = Add[r + 1];
00303 //  }
00304 //  else
00305 //  {
00306 //    f0 = Add[r - 1];
00307 //    f1 = Add[r];
00308 //    f2 = Add[r + 1];
00309 //  }
00310 //  c = FABS(c);
00311 //  c1 = c - r;
00312 //  c2 = c1 + K(1.0);
00313 //  c3 = c1 - K(1.0);
00314 //  return (f0 * c1 * c3 / K(2.0) - f1 * c2 * c3 + f2 * c2 * c1 / K(2.0));
00315 //}
00316 
00318 C kubintkern(const R x, const C *Add, const int Ad, const R a)
00319 {
00320   R c, c1, c2, c3, c4;
00321   int r;
00322   C f0, f1, f2, f3;
00323   c = x * (R)(Ad) / a;
00324   r = (int)(LRINT(c));
00325   r = abs(r);
00326   if (r == 0)
00327   {
00328     f0 = Add[r + 1];
00329     f1 = Add[r];
00330     f2 = Add[r + 1];
00331     f3 = Add[r + 2];
00332   }
00333   else
00334   {
00335     f0 = Add[r - 1];
00336     f1 = Add[r];
00337     f2 = Add[r + 1];
00338     f3 = Add[r + 2];
00339   }
00340   c = FABS(c);
00341   c1 = c - (R)(r);
00342   c2 = c1 + K(1.0);
00343   c3 = c1 - K(1.0);
00344   c4 = c1 - K(2.0);
00345   /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
00346    f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
00347   return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
00348       - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
00349 }
00350 
00352 static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
00353 {
00354   R c, c1, c2, c3, c4;
00355   int r;
00356   C f0, f1, f2, f3;
00357   Add += 2;
00358   c = (x + a) * (R)(Ad) / K(2.0) / a;
00359   r = (int)(LRINT(c));
00360   r = abs(r);
00361   /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00362    else */
00363   {
00364     f0 = Add[r - 1];
00365     f1 = Add[r];
00366     f2 = Add[r + 1];
00367     f3 = Add[r + 2];
00368   }
00369   c = FABS(c);
00370   c1 = c - (R)(r);
00371   c2 = c1 + K(1.0);
00372   c3 = c1 - K(1.0);
00373   c4 = c1 - K(2.0);
00374   /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
00375    f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
00376   return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
00377       - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
00378 }
00379 
00381 static void quicksort(int d, int t, R *x, C *alpha, int N)
00382 {
00383   int lpos = 0;
00384   int rpos = N - 1;
00385   /*R pivot=x[((N-1)/2)*d+t];*/
00386   R pivot = x[(N / 2) * d + t];
00387 
00388   int k;
00389   R temp1;
00390   C temp2;
00391 
00392   while (lpos <= rpos)
00393   {
00394     while (x[lpos * d + t] < pivot)
00395       lpos++;
00396     while (x[rpos * d + t] > pivot)
00397       rpos--;
00398     if (lpos <= rpos)
00399     {
00400       for (k = 0; k < d; k++)
00401       {
00402         temp1 = x[lpos * d + k];
00403         x[lpos * d + k] = x[rpos * d + k];
00404         x[rpos * d + k] = temp1;
00405       }
00406       temp2 = alpha[lpos];
00407       alpha[lpos] = alpha[rpos];
00408       alpha[rpos] = temp2;
00409 
00410       lpos++;
00411       rpos--;
00412     }
00413   }
00414   if (0 < rpos)
00415     quicksort(d, t, x, alpha, rpos + 1);
00416   if (lpos < N - 1)
00417     quicksort(d, t, x + lpos * d, alpha + lpos, N - lpos);
00418 }
00419 
00421 static void BuildBox(fastsum_plan *ths)
00422 {
00423   int t, l;
00424   int *box_index;
00425   R val[ths->d];
00426 
00427   box_index = (int *) NFFT(malloc)((size_t)(ths->box_count) * sizeof(int));
00428   for (t = 0; t < ths->box_count; t++)
00429     box_index[t] = 0;
00430 
00431   for (l = 0; l < ths->N_total; l++)
00432   {
00433     int ind = 0;
00434     for (t = 0; t < ths->d; t++)
00435     {
00436       val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
00437       ind *= ths->box_count_per_dim;
00438       ind += (int) (val[t] / ths->eps_I);
00439     }
00440     box_index[ind]++;
00441   }
00442 
00443   ths->box_offset[0] = 0;
00444   for (t = 1; t <= ths->box_count; t++)
00445   {
00446     ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1];
00447     box_index[t - 1] = ths->box_offset[t - 1];
00448   }
00449 
00450   for (l = 0; l < ths->N_total; l++)
00451   {
00452     int ind = 0;
00453     for (t = 0; t < ths->d; t++)
00454     {
00455       val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
00456       ind *= ths->box_count_per_dim;
00457       ind += (int) (val[t] / ths->eps_I);
00458     }
00459 
00460     ths->box_alpha[box_index[ind]] = ths->alpha[l];
00461 
00462     for (t = 0; t < ths->d; t++)
00463     {
00464       ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
00465     }
00466     box_index[ind]++;
00467   }
00468   NFFT(free)(box_index);
00469 }
00470 
00472 static inline C calc_SearchBox(int d, R *y, R *x, C *alpha, int start,
00473     int end_lt, const C *Add, const int Ad, int p, R a, const kernel k,
00474     const R *param, const unsigned flags)
00475 {
00476   C result = K(0.0);
00477 
00478   int m, l;
00479   R r;
00480 
00481   for (m = start; m < end_lt; m++)
00482   {
00483     if (d == 1)
00484     {
00485       r = y[0] - x[m];
00486     }
00487     else
00488     {
00489       r = K(0.0);
00490       for (l = 0; l < d; l++)
00491         r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]);
00492       r = SQRT(r);
00493     }
00494     if (FABS(r) < a)
00495     {
00496       result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
00497       if (d == 1)
00498       {
00499         if (flags & EXACT_NEARFIELD)
00500           result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D)  */
00501         else
00502           result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
00503       }
00504       else
00505       {
00506         if (flags & EXACT_NEARFIELD)
00507           result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD)  */
00508         else
00509 #if defined(NF_KUB)
00510           result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
00511 #elif defined(NF_QUADR)
00512         result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
00513 #elif defined(NF_LIN)
00514         result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
00515 #else
00516 #error define interpolation method
00517 #endif
00518       }
00519     }
00520   }
00521   return result;
00522 }
00523 
00525 static C SearchBox(R *y, fastsum_plan *ths)
00526 {
00527   C val = K(0.0);
00528   int t;
00529   int y_multiind[ths->d];
00530   int multiindex[ths->d];
00531   int y_ind;
00532 
00533   for (t = 0; t < ths->d; t++)
00534   {
00535     y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->eps_B / K(2.0)) / ths->eps_I));
00536   }
00537 
00538   if (ths->d == 1)
00539   {
00540     for (y_ind = max_i(0, y_multiind[0] - 1);
00541         y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++)
00542     {
00543       val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
00544           ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, ths->Ad,
00545           ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
00546     }
00547   }
00548   else if (ths->d == 2)
00549   {
00550     for (multiindex[0] = max_i(0, y_multiind[0] - 1);
00551         multiindex[0] < ths->box_count_per_dim
00552             && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
00553       for (multiindex[1] = max_i(0, y_multiind[1] - 1);
00554           multiindex[1] < ths->box_count_per_dim
00555               && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
00556       {
00557         y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
00558         val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
00559             ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
00560             ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
00561       }
00562   }
00563   else if (ths->d == 3)
00564   {
00565     for (multiindex[0] = max_i(0, y_multiind[0] - 1);
00566         multiindex[0] < ths->box_count_per_dim
00567             && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
00568       for (multiindex[1] = max_i(0, y_multiind[1] - 1);
00569           multiindex[1] < ths->box_count_per_dim
00570               && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
00571         for (multiindex[2] = max_i(0, y_multiind[2] - 1);
00572             multiindex[2] < ths->box_count_per_dim
00573                 && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++)
00574         {
00575           y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1])
00576               * ths->box_count_per_dim + multiindex[2];
00577           val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
00578               ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
00579               ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param,
00580               ths->flags);
00581         }
00582   }
00583   else
00584   {
00585     exit(EXIT_FAILURE);
00586   }
00587   return val;
00588 }
00589 
00591 static void BuildTree(int d, int t, R *x, C *alpha, int N)
00592 {
00593   if (N > 1)
00594   {
00595     int m = N / 2;
00596 
00597     quicksort(d, t, x, alpha, N);
00598 
00599     BuildTree(d, (t + 1) % d, x, alpha, m);
00600     BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), N - m - 1);
00601   }
00602 }
00603 
00605 static C SearchTree(const int d, const int t, const R *x, const C *alpha,
00606     const R *xmin, const R *xmax, const int N, const kernel k, const R *param,
00607     const int Ad, const C *Add, const int p, const unsigned flags)
00608 {
00609   if (N == 0)
00610   {
00611       return K(0.0);
00612   }
00613   else
00614   {
00615       int m = N / 2;
00616       R Min = xmin[t];
00617       R Max = xmax[t];
00618       R Median = x[m * d + t];
00619       R a = FABS(Max - Min) / 2;
00620       int l;
00621       int E = 0;
00622       R r;
00623 
00624       if (Min > Median)
00625           return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
00626                   xmax, N - m - 1, k, param, Ad, Add, p, flags);
00627       else if (Max < Median)
00628           return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad,
00629                   Add, p, flags);
00630       else
00631       {
00632           C result = K(0.0);
00633           E = 0;
00634 
00635           for (l = 0; l < d; l++)
00636           {
00637               if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l])
00638                   E++;
00639           }
00640 
00641           if (E == d)
00642           {
00643               if (d == 1)
00644               {
00645                   r = xmin[0] + a - x[m]; /* remember: xmin+a = y */
00646               }
00647               else
00648               {
00649                   r = K(0.0);
00650                   for (l = 0; l < d; l++)
00651                       r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]); /* remember: xmin+a = y */
00652                   r = SQRT(r);
00653               }
00654               if (FABS(r) < a)
00655               {
00656                   result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
00657                   if (d == 1)
00658                   {
00659                       if (flags & EXACT_NEARFIELD)
00660                           result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D)  */
00661                       else
00662                           result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
00663                   }
00664                   else
00665                   {
00666                       if (flags & EXACT_NEARFIELD)
00667                           result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD)  */
00668                       else
00669 #if defined(NF_KUB)
00670                           result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
00671 #elif defined(NF_QUADR)
00672                       result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
00673 #elif defined(NF_LIN)
00674                       result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
00675 #else
00676 #error define interpolation method
00677 #endif
00678                   }
00679               }
00680           }
00681           result += SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
00682                   xmax, N - m - 1, k, param, Ad, Add, p, flags)
00683                 + SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add,
00684                         p, flags);
00685           return result;
00686       }
00687   }
00688 }
00689 
00691 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total,
00692     kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
00693 {
00694   int t;
00695   int N[d], n[d];
00696   int n_total;
00697   unsigned sort_flags_trafo = 0U;
00698   unsigned sort_flags_adjoint = 0U;
00699 #ifdef _OPENMP
00700   int nthreads = NFFT(get_num_threads)();
00701 #endif
00702 
00703   if (d > 1)
00704   {
00705     sort_flags_trafo = NFFT_SORT_NODES;
00706 #ifdef _OPENMP
00707     sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
00708 #else
00709     sort_flags_adjoint = NFFT_SORT_NODES;
00710 #endif
00711   }
00712 
00713   ths->d = d;
00714 
00715   ths->N_total = N_total;
00716   ths->M_total = M_total;
00717 
00718   ths->x = (R *) NFFT(malloc)((size_t)(d * N_total) * (sizeof(R)));
00719   ths->alpha = (C *) NFFT(malloc)((size_t)(N_total) * (sizeof(C)));
00720 
00721   ths->y = (R *) NFFT(malloc)((size_t)(d * M_total) * (sizeof(R)));
00722   ths->f = (C *) NFFT(malloc)((size_t)(M_total) * (sizeof(C)));
00723 
00724   ths->k = k;
00725   ths->kernel_param = param;
00726 
00727   ths->flags = flags;
00728 
00729   ths->p = p;
00730   ths->eps_I = eps_I; /* =(R)ths->p/(R)nn; */
00731   ths->eps_B = eps_B; /* =K(1.0)/K(16.0); */
00734   if (!(ths->flags & EXACT_NEARFIELD))
00735   {
00736     if (ths->d == 1)
00737     {
00738       ths->Ad = 4 * (ths->p) * (ths->p);
00739       ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 5) * (sizeof(C)));
00740     }
00741     else
00742     {
00743       if (ths->k == one_over_x)
00744       {
00745         R delta = K(1e-8);
00746         switch (p)
00747         {
00748           case 2:
00749             delta = K(1e-3);
00750             break;
00751           case 3:
00752             delta = K(1e-4);
00753             break;
00754           case 4:
00755             delta = K(1e-5);
00756             break;
00757           case 5:
00758             delta = K(1e-6);
00759             break;
00760           case 6:
00761             delta = K(1e-6);
00762             break;
00763           case 7:
00764             delta = K(1e-7);
00765             break;
00766           default:
00767             delta = K(1e-8);
00768         }
00769 
00770 #if defined(NF_KUB)
00771         ths->Ad = max_i(10, (int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0))))));
00772         ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
00773 #elif defined(NF_QUADR)
00774         ths->Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0)))));
00775         ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
00776 #elif defined(NF_LIN)
00777         ths->Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0)))));
00778         ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
00779 #else
00780 #error define NF_LIN or NF_QUADR or NF_KUB
00781 #endif
00782       }
00783       else
00784       {
00785         ths->Ad = 2 * (ths->p) * (ths->p);
00786         ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
00787       }
00788     }
00789   }
00790 
00792   ths->n = nn;
00793   for (t = 0; t < d; t++)
00794   {
00795     N[t] = nn;
00796     n[t] = 2 * nn;
00797   }
00798   NFFT(init_guru)(&(ths->mv1), d, N, N_total, n, m,
00799       sort_flags_adjoint |
00800       PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT
00801           | FFT_OUT_OF_PLACE,
00802       FFTW_MEASURE | FFTW_DESTROY_INPUT);
00803   NFFT(init_guru)(&(ths->mv2), d, N, M_total, n, m,
00804       sort_flags_trafo |
00805       PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | MALLOC_F | FFTW_INIT
00806           | FFT_OUT_OF_PLACE,
00807       FFTW_MEASURE | FFTW_DESTROY_INPUT);
00808 
00810   n_total = 1;
00811   for (t = 0; t < d; t++)
00812     n_total *= nn;
00813 
00814   ths->b = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
00815 #ifdef _OPENMP
00816   #pragma omp critical (nfft_omp_critical_fftw_plan)
00817   {
00818     FFTW(plan_with_nthreads)(nthreads);
00819 #endif
00820 
00821   ths->fft_plan = FFTW(plan_dft)(d, N, ths->b, ths->b, FFTW_FORWARD,
00822       FFTW_ESTIMATE);
00823 
00824 #ifdef _OPENMP
00825 }
00826 #endif
00827 
00828   if (ths->flags & NEARFIELD_BOXES)
00829   {
00830     ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->eps_B) / ths->eps_I))) + 1;
00831     ths->box_count = 1;
00832     for (t = 0; t < ths->d; t++)
00833       ths->box_count *= ths->box_count_per_dim;
00834 
00835     ths->box_offset = (int *) NFFT(malloc)((size_t)(ths->box_count + 1) * sizeof(int));
00836 
00837     ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(C)));
00838 
00839     ths->box_x = (R *) NFFT(malloc)((size_t)(ths->d * ths->N_total) * sizeof(R));
00840   }
00841 }
00842 
00844 void fastsum_finalize(fastsum_plan *ths)
00845 {
00846   NFFT(free)(ths->x);
00847   NFFT(free)(ths->alpha);
00848   NFFT(free)(ths->y);
00849   NFFT(free)(ths->f);
00850 
00851   if (!(ths->flags & EXACT_NEARFIELD))
00852     NFFT(free)(ths->Add);
00853 
00854   NFFT(finalize)(&(ths->mv1));
00855   NFFT(finalize)(&(ths->mv2));
00856 
00857 #ifdef _OPENMP
00858   #pragma omp critical (nfft_omp_critical_fftw_plan)
00859   {
00860 #endif
00861   FFTW(destroy_plan)(ths->fft_plan);
00862 #ifdef _OPENMP
00863 }
00864 #endif
00865 
00866   NFFT(free)(ths->b);
00867 
00868   if (ths->flags & NEARFIELD_BOXES)
00869   {
00870     NFFT(free)(ths->box_offset);
00871     NFFT(free)(ths->box_alpha);
00872     NFFT(free)(ths->box_x);
00873   }
00874 }
00875 
00877 void fastsum_exact(fastsum_plan *ths)
00878 {
00879   int j, k;
00880   int t;
00881   R r;
00882 
00883 #ifdef _OPENMP
00884   #pragma omp parallel for default(shared) private(j,k,t,r)
00885 #endif
00886   for (j = 0; j < ths->M_total; j++)
00887   {
00888     ths->f[j] = K(0.0);
00889     for (k = 0; k < ths->N_total; k++)
00890     {
00891       if (ths->d == 1)
00892         r = ths->y[j] - ths->x[k];
00893       else
00894       {
00895         r = K(0.0);
00896         for (t = 0; t < ths->d; t++)
00897           r += (ths->y[j * ths->d + t] - ths->x[k * ths->d + t])
00898               * (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]);
00899         r = SQRT(r);
00900       }
00901       ths->f[j] += ths->alpha[k] * ths->k(r, 0, ths->kernel_param);
00902     }
00903   }
00904 }
00905 
00907 void fastsum_precompute(fastsum_plan *ths)
00908 {
00909   int j, k, t;
00910   int n_total;
00911 #ifdef MEASURE_TIME
00912   ticks t0, t1;
00913 #endif
00914 
00915   ths->MEASURE_TIME_t[0] = K(0.0);
00916   ths->MEASURE_TIME_t[1] = K(0.0);
00917   ths->MEASURE_TIME_t[2] = K(0.0);
00918   ths->MEASURE_TIME_t[3] = K(0.0);
00919 
00920 #ifdef MEASURE_TIME
00921   t0 = getticks();
00922 #endif
00923 
00924   if (ths->flags & NEARFIELD_BOXES)
00925   {
00926     BuildBox(ths);
00927   }
00928   else
00929   {
00931     BuildTree(ths->d, 0, ths->x, ths->alpha, ths->N_total);
00932   }
00933 
00934 #ifdef MEASURE_TIME
00935   t1 = getticks();
00936   ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
00937 #endif
00938 
00939 #ifdef MEASURE_TIME
00940   t0 = getticks();
00941 #endif
00942 
00943   if (!(ths->flags & EXACT_NEARFIELD))
00944   {
00945     if (ths->d == 1)
00946 #ifdef _OPENMP
00947       #pragma omp parallel for default(shared) private(k)
00948 #endif
00949       for (k = -ths->Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++)
00950         ths->Add[k + ths->Ad / 2 + 2] = regkern1(ths->k,
00951             ths->eps_I * (R) k / (R)(ths->Ad) * K(2.0), ths->p, ths->kernel_param,
00952             ths->eps_I, ths->eps_B);
00953     else
00954 #ifdef _OPENMP
00955       #pragma omp parallel for default(shared) private(k)
00956 #endif
00957       for (k = 0; k <= ths->Ad + 2; k++)
00958         ths->Add[k] = regkern3(ths->k, ths->eps_I * (R) k / (R)(ths->Ad), ths->p,
00959             ths->kernel_param, ths->eps_I, ths->eps_B);
00960   }
00961 #ifdef MEASURE_TIME
00962   t1 = getticks();
00963   ths->MEASURE_TIME_t[0] += NFFT(elapsed_seconds)(t1,t0);
00964 #endif
00965 
00966 #ifdef MEASURE_TIME
00967   t0 = getticks();
00968 #endif
00969 
00970   for (k = 0; k < ths->mv1.M_total; k++)
00971     for (t = 0; t < ths->mv1.d; t++)
00972       ths->mv1.x[ths->mv1.d * k + t] = -ths->x[ths->mv1.d * k + t]; /* note the factor -1 for transposed transform instead of adjoint*/
00973 
00975   if (ths->mv1.flags & PRE_LIN_PSI)
00976     NFFT(precompute_lin_psi)(&(ths->mv1));
00977 
00978   if (ths->mv1.flags & PRE_PSI)
00979     NFFT(precompute_psi)(&(ths->mv1));
00980 
00981   if (ths->mv1.flags & PRE_FULL_PSI)
00982     NFFT(precompute_full_psi)(&(ths->mv1));
00983 #ifdef MEASURE_TIME
00984   t1 = getticks();
00985   ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
00986 #endif
00987 
00989   for (k = 0; k < ths->mv1.M_total; k++)
00990     ths->mv1.f[k] = ths->alpha[k];
00991 
00992 #ifdef MEASURE_TIME
00993   t0 = getticks();
00994 #endif
00995 
00996   for (j = 0; j < ths->mv2.M_total; j++)
00997     for (t = 0; t < ths->mv2.d; t++)
00998       ths->mv2.x[ths->mv2.d * j + t] = -ths->y[ths->mv2.d * j + t]; /* note the factor -1 for conjugated transform instead of standard*/
00999 
01001   if (ths->mv2.flags & PRE_LIN_PSI)
01002     NFFT(precompute_lin_psi)(&(ths->mv2));
01003 
01004   if (ths->mv2.flags & PRE_PSI)
01005     NFFT(precompute_psi)(&(ths->mv2));
01006 
01007   if (ths->mv2.flags & PRE_FULL_PSI)
01008     NFFT(precompute_full_psi)(&(ths->mv2));
01009 #ifdef MEASURE_TIME
01010   t1 = getticks();
01011   ths->MEASURE_TIME_t[2] += NFFT(elapsed_seconds)(t1,t0);
01012 #endif
01013 
01014 #ifdef MEASURE_TIME
01015   t0 = getticks();
01016 #endif
01017 
01018   n_total = 1;
01019   for (t = 0; t < ths->d; t++)
01020     n_total *= ths->n;
01021 
01022 #ifdef _OPENMP
01023   #pragma omp parallel for default(shared) private(j,k,t)
01024 #endif
01025   for (j = 0; j < n_total; j++)
01026   {
01027     if (ths->d == 1)
01028       ths->b[j] = regkern1(ths->k, (R) j / (R)(ths->n) - K(0.5), ths->p,
01029           ths->kernel_param, ths->eps_I, ths->eps_B) / (R)(n_total);
01030     else
01031     {
01032       k = j;
01033       ths->b[j] = K(0.0);
01034       for (t = 0; t < ths->d; t++)
01035       {
01036         ths->b[j] += ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5))
01037             * ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5));
01038         k = k / (ths->n);
01039       }
01040       ths->b[j] = regkern3(ths->k, SQRT(CREAL(ths->b[j])), ths->p, ths->kernel_param,
01041           ths->eps_I, ths->eps_B) / (R)(n_total);
01042     }
01043   }
01044 
01045   NFFT(fftshift_complex)(ths->b, (int)(ths->mv1.d), ths->mv1.N);
01046   FFTW(execute)(ths->fft_plan);
01047   NFFT(fftshift_complex)(ths->b, (int)(ths->mv1.d), ths->mv1.N);
01048 #ifdef MEASURE_TIME
01049   t1 = getticks();
01050   ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
01051 #endif
01052 }
01053 
01055 void fastsum_trafo(fastsum_plan *ths)
01056 {
01057   int j, k, t;
01058 #ifdef MEASURE_TIME
01059   ticks t0, t1;
01060 #endif
01061 
01062   ths->MEASURE_TIME_t[4] = K(0.0);
01063   ths->MEASURE_TIME_t[5] = K(0.0);
01064   ths->MEASURE_TIME_t[6] = K(0.0);
01065   ths->MEASURE_TIME_t[7] = K(0.0);
01066 
01067 #ifdef MEASURE_TIME
01068   t0 = getticks();
01069 #endif
01070 
01071   NFFT(adjoint)(&(ths->mv1));
01072 #ifdef MEASURE_TIME
01073   t1 = getticks();
01074   ths->MEASURE_TIME_t[4] += NFFT(elapsed_seconds)(t1,t0);
01075 #endif
01076 
01077 #ifdef MEASURE_TIME
01078   t0 = getticks();
01079 #endif
01080 
01081 #ifdef _OPENMP
01082   #pragma omp parallel for default(shared) private(k)
01083 #endif
01084   for (k = 0; k < ths->mv2.N_total; k++)
01085     ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
01086 #ifdef MEASURE_TIME
01087   t1 = getticks();
01088   ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
01089 #endif
01090 
01091 #ifdef MEASURE_TIME
01092   t0 = getticks();
01093 #endif
01094 
01095   NFFT(trafo)(&(ths->mv2));
01096 #ifdef MEASURE_TIME
01097   t1 = getticks();
01098   ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
01099 #endif
01100 
01101 #ifdef MEASURE_TIME
01102   t0 = getticks();
01103 #endif
01104 
01105 #ifdef _OPENMP
01106   #pragma omp parallel for default(shared) private(j,k,t)
01107 #endif
01108   for (j = 0; j < ths->M_total; j++)
01109   {
01110     R ymin[ths->d], ymax[ths->d]; 
01112     if (ths->flags & NEARFIELD_BOXES)
01113     {
01114       ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d * j, ths);
01115     }
01116     else
01117     {
01118       for (t = 0; t < ths->d; t++)
01119       {
01120         ymin[t] = ths->y[ths->d * j + t] - ths->eps_I;
01121         ymax[t] = ths->y[ths->d * j + t] + ths->eps_I;
01122       }
01123       ths->f[j] = ths->mv2.f[j]
01124           + SearchTree(ths->d, 0, ths->x, ths->alpha, ymin, ymax, ths->N_total,
01125               ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
01126     }
01127     /* ths->f[j] = ths->mv2.f[j]; */
01128     /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */
01129   }
01130 
01131 #ifdef MEASURE_TIME
01132   t1 = getticks();
01133   ths->MEASURE_TIME_t[7] += NFFT(elapsed_seconds)(t1,t0);
01134 #endif
01135 }
01136 /* \} */
01137 
01138 /* fastsum.c */