3 static void UpdSnode(
int m,
14 double rtemp1, rtemp2, rtemp3, rtemp4,
15 rtemp5, rtemp6, rtemp7, rtemp8,
16 rtemp9, rtemp10,rtemp11,rtemp12,
17 rtemp13,rtemp14,rtemp15,rtemp16,
18 *a1,*a2, *a3, *a4, *a5, *a6, *a7, *a8,
19 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
22 for(i=0; i+1<s; i+=2) {
30 a1 = a+(fira[k+0 ]+i);
31 a2 = a+(fira[k+1 ]+i);
32 a3 = a+(fira[k+2 ]+i);
33 a4 = a+(fira[k+3 ]+i);
35 rtemp1 = a1[0]/diaga[k+0];
36 rtemp2 = a2[0]/diaga[k+1];
37 rtemp3 = a3[0]/diaga[k+2];
38 rtemp4 = a4[0]/diaga[k+3];
40 diagb[i] -= rtemp1 * a1[0]
50 rtemp5 = a1[0]/diaga[k+0];
51 rtemp6 = a2[0]/diaga[k+1];
52 rtemp7 = a3[0]/diaga[k+2];
53 rtemp8 = a4[0]/diaga[k+3];
55 b00[0] -= rtemp1 * a1[0]
60 diagb[i+1] -= rtemp5 * a1[0]
70 for(t=0; t<sze; ++t) {
71 b0[t] -= rtemp1 * a1[t]
76 b1[t] -= rtemp5 * a1[t]
84 a1 = a+(fira[k+0 ]+i);
85 a2 = a+(fira[k+1 ]+i);
87 rtemp1 = a1[0] / diaga[k+0];
88 rtemp2 = a2[0] / diaga[k+1];
90 diagb[i] -= a1[0] * rtemp1
96 rtemp5 = a1[0] / diaga[k+0];
97 rtemp6 = a2[0] / diaga[k+1];
99 b00[0] -= a1[0] * rtemp1
102 diagb[i+1] -= a1[0] * rtemp5
109 for(t=0; t<sze; ++t) {
110 b0[t] -= a1[t] * rtemp1
113 b1[t] -= a1[t] * rtemp5
119 a1 = a+(fira[k+0 ]+i);
121 rtemp1 = a1[0] / diaga[k+0];
123 diagb[i] -= a1[0] * rtemp1;
127 rtemp5 = a1[0] / diaga[k+0];
129 diagb[i+1] -= a1[0] * rtemp5;
131 b00[0] -= a1[0] * rtemp1;
137 b0[t] -= rtemp1 * a1[t];
138 b1[t] -= rtemp5 * a1[t];
149 for(; k+15<n; k+=16) {
150 a1 = a+(fira[k+0 ]+i);
151 a2 = a+(fira[k+1 ]+i);
152 a3 = a+(fira[k+2 ]+i);
153 a4 = a+(fira[k+3 ]+i);
154 a5 = a+(fira[k+4 ]+i);
155 a6 = a+(fira[k+5 ]+i);
156 a7 = a+(fira[k+6 ]+i);
157 a8 = a+(fira[k+7 ]+i);
158 a9 = a+(fira[k+8 ]+i);
159 a10 = a+(fira[k+9 ]+i);
160 a11 = a+(fira[k+10]+i);
161 a12 = a+(fira[k+11]+i);
162 a13 = a+(fira[k+12]+i);
163 a14 = a+(fira[k+13]+i);
164 a15 = a+(fira[k+14]+i);
165 a16 = a+(fira[k+15]+i);
167 rtemp1 = *a1/diaga[k+0];
168 rtemp2 = *a2/diaga[k+1];
169 rtemp3 = *a3/diaga[k+2];
170 rtemp4 = *a4/diaga[k+3];
171 rtemp5 = *a5/diaga[k+4];
172 rtemp6 = *a6/diaga[k+5];
173 rtemp7 = *a7/diaga[k+6];
174 rtemp8 = *a8/diaga[k+7];
175 rtemp9 = *a9/diaga[k+8];
176 rtemp10 = *a10/diaga[k+9];
177 rtemp11 = *a11/diaga[k+10];
178 rtemp12 = *a12/diaga[k+11];
179 rtemp13 = *a13/diaga[k+12];
180 rtemp14 = *a14/diaga[k+13];
181 rtemp15 = *a15/diaga[k+14];
182 rtemp16 = *a16/diaga[k+15];
184 diagb[i] -= rtemp1 * (*a1)
220 b0[t] -= rtemp1 * a1[t]
239 for(; k+11<n; k+=12) {
240 a1 = a+(fira[k+0 ]+i);
241 a2 = a+(fira[k+1 ]+i);
242 a3 = a+(fira[k+2 ]+i);
243 a4 = a+(fira[k+3 ]+i);
244 a5 = a+(fira[k+4 ]+i);
245 a6 = a+(fira[k+5 ]+i);
246 a7 = a+(fira[k+6 ]+i);
247 a8 = a+(fira[k+7 ]+i);
248 a9 = a+(fira[k+8 ]+i);
249 a10 = a+(fira[k+9 ]+i);
250 a11 = a+(fira[k+10]+i);
251 a12 = a+(fira[k+11]+i);
253 rtemp1 = *a1/diaga[k+0];
254 rtemp2 = *a2/diaga[k+1];
255 rtemp3 = *a3/diaga[k+2];
256 rtemp4 = *a4/diaga[k+3];
257 rtemp5 = *a5/diaga[k+4];
258 rtemp6 = *a6/diaga[k+5];
259 rtemp7 = *a7/diaga[k+6];
260 rtemp8 = *a8/diaga[k+7];
261 rtemp9 = *a9/diaga[k+8];
262 rtemp10 = *a10/diaga[k+9];
263 rtemp11 = *a11/diaga[k+10];
264 rtemp12 = *a12/diaga[k+11];
266 diagb[i] -= rtemp1 * (*a1)
293 b0[t] -= rtemp1 * a1[t]
309 a1 = a+(fira[k+0 ]+i);
310 a2 = a+(fira[k+1 ]+i);
311 a3 = a+(fira[k+2 ]+i);
312 a4 = a+(fira[k+3 ]+i);
313 a5 = a+(fira[k+4 ]+i);
314 a6 = a+(fira[k+5 ]+i);
315 a7 = a+(fira[k+6 ]+i);
316 a8 = a+(fira[k+7 ]+i);
318 rtemp1 = *a1/diaga[k+0];
319 rtemp2 = *a2/diaga[k+1];
320 rtemp3 = *a3/diaga[k+2];
321 rtemp4 = *a4/diaga[k+3];
322 rtemp5 = *a5/diaga[k+4];
323 rtemp6 = *a6/diaga[k+5];
324 rtemp7 = *a7/diaga[k+6];
325 rtemp8 = *a8/diaga[k+7];
327 diagb[i] -= rtemp1 * (*a1)
346 b0[t] -= rtemp1 * a1[t]
358 a1 = a+(fira[k+0 ]+i);
359 a2 = a+(fira[k+1 ]+i);
360 a3 = a+(fira[k+2 ]+i);
361 a4 = a+(fira[k+3 ]+i);
363 rtemp1 = *a1/diaga[k+0];
364 rtemp2 = *a2/diaga[k+1];
365 rtemp3 = *a3/diaga[k+2];
366 rtemp4 = *a4/diaga[k+3];
368 diagb[i] -= rtemp1 * (*a1)
379 b0[t] -= rtemp1 * a1[t]
387 a1 = a+(fira[k+0 ]+i);
388 a2 = a+(fira[k+1 ]+i);
390 rtemp1 = *a1/diaga[k+0];
391 rtemp2 = *a2/diaga[k+1];
393 diagb[i] -= rtemp1 * (*a1)
400 b0[t] -= rtemp1 * a1[t]
405 a1 = a+(fira[k+0 ]+i);
407 rtemp1 = *a1/diaga[k+0];
409 diagb[i] -= rtemp1 * (*a1);
414 b0[t] -= rtemp1 * a1[t];
419 static void iUpdSnode(chfac *cf,
428 *ujsze=cf->ujsze,*uhead=cf->uhead,*subg=cf->subg;
429 double *diag=cf->diag,*uval=cf->uval;
440 iw[k-f] = uhead[k]+uf-k-1;
442 UpdSnode(1+ujsze[uf],l-f,ul-uf,diag+f,uval,iw,diag+uf,uval,uhead+uf);
445 static int DiagUpdate(
double *dii,
455 if (fabs(*dii)<1.0e-35) {
456 printf(
" diagonal nearly zero: %5.1e.\n",(*dii));
463 static int FacSnode(chfac *cf,
475 itemp = cf->subg[snde]+f;
477 if (!DiagUpdate(cf->diag+itemp,
481 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
482 printf(
"Singular d[%d]=%e\n",
483 cf->subg[snde]+f,cf->diag[cf->subg[snde]+f]);
487 for(k=f+1; k<l; ++k) {
488 iUpdSnode(cf,snde,f,k,k,k+1,iw);
490 itemp = cf->subg[snde]+k;
492 if (!DiagUpdate(&cf->diag[itemp],
496 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
497 printf(
" singular d[%d]=%e\n",
498 cf->subg[snde]+k,cf->diag[cf->subg[snde]+k]);
507 static void UpdSnodes(
int m,
518 int i,j,k,t,u,sze,delay,
520 double rtemp1,rtemp2,rtemp3,rtemp4,
521 rtemp5,rtemp6,rtemp7,rtemp8,
522 rtemp9,rtemp10,rtemp11,rtemp12,
523 rtemp13,rtemp14,rtemp15,rtemp16,
524 *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,
525 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
546 for(; k+15<n; k+=16) {
547 a1 = a+(fira[k+0 ]+i);
548 a2 = a+(fira[k+1 ]+i);
549 a3 = a+(fira[k+2 ]+i);
550 a4 = a+(fira[k+3 ]+i);
551 a5 = a+(fira[k+4 ]+i);
552 a6 = a+(fira[k+5 ]+i);
553 a7 = a+(fira[k+6 ]+i);
554 a8 = a+(fira[k+7 ]+i);
555 a9 = a+(fira[k+8 ]+i);
556 a10 = a+(fira[k+9 ]+i);
557 a11 = a+(fira[k+10]+i);
558 a12 = a+(fira[k+11]+i);
559 a13 = a+(fira[k+12]+i);
560 a14 = a+(fira[k+13]+i);
561 a15 = a+(fira[k+14]+i);
562 a16 = a+(fira[k+15]+i);
564 rtemp1 = *a1/diaga[k+0];
565 rtemp2 = *a2/diaga[k+1];
566 rtemp3 = *a3/diaga[k+2];
567 rtemp4 = *a4/diaga[k+3];
568 rtemp5 = *a5/diaga[k+4];
569 rtemp6 = *a6/diaga[k+5];
570 rtemp7 = *a7/diaga[k+6];
571 rtemp8 = *a8/diaga[k+7];
572 rtemp9 = *a9/diaga[k+8];
573 rtemp10 = *a10/diaga[k+9];
574 rtemp11 = *a11/diaga[k+10];
575 rtemp12 = *a12/diaga[k+11];
576 rtemp13 = *a13/diaga[k+12];
577 rtemp14 = *a14/diaga[k+13];
578 rtemp15 = *a15/diaga[k+14];
579 rtemp16 = *a16/diaga[k+15];
581 diagb[u] -= rtemp1 * (*a1)
617 b0[ls[t]-delay] -= rtemp1 * a1[t]
635 for(; k+11<n; k+=12) {
636 a1 = a+(fira[k+0 ]+i);
637 a2 = a+(fira[k+1 ]+i);
638 a3 = a+(fira[k+2 ]+i);
639 a4 = a+(fira[k+3 ]+i);
640 a5 = a+(fira[k+4 ]+i);
641 a6 = a+(fira[k+5 ]+i);
642 a7 = a+(fira[k+6 ]+i);
643 a8 = a+(fira[k+7 ]+i);
644 a9 = a+(fira[k+8 ]+i);
645 a10 = a+(fira[k+9 ]+i);
646 a11 = a+(fira[k+10]+i);
647 a12 = a+(fira[k+11]+i);
649 rtemp1 = *a1/diaga[k+0];
650 rtemp2 = *a2/diaga[k+1];
651 rtemp3 = *a3/diaga[k+2];
652 rtemp4 = *a4/diaga[k+3];
653 rtemp5 = *a5/diaga[k+4];
654 rtemp6 = *a6/diaga[k+5];
655 rtemp7 = *a7/diaga[k+6];
656 rtemp8 = *a8/diaga[k+7];
657 rtemp9 = *a9/diaga[k+8];
658 rtemp10 = *a10/diaga[k+9];
659 rtemp11 = *a11/diaga[k+10];
660 rtemp12 = *a12/diaga[k+11];
662 diagb[u] -= rtemp1 * (*a1)
689 b0[ls[t]-delay] -= rtemp1 * a1[t]
704 a1 = a+(fira[k+0 ]+i);
705 a2 = a+(fira[k+1 ]+i);
706 a3 = a+(fira[k+2 ]+i);
707 a4 = a+(fira[k+3 ]+i);
708 a5 = a+(fira[k+4 ]+i);
709 a6 = a+(fira[k+5 ]+i);
710 a7 = a+(fira[k+6 ]+i);
711 a8 = a+(fira[k+7 ]+i);
713 rtemp1 = *a1/diaga[k+0];
714 rtemp2 = *a2/diaga[k+1];
715 rtemp3 = *a3/diaga[k+2];
716 rtemp4 = *a4/diaga[k+3];
717 rtemp5 = *a5/diaga[k+4];
718 rtemp6 = *a6/diaga[k+5];
719 rtemp7 = *a7/diaga[k+6];
720 rtemp8 = *a8/diaga[k+7];
722 diagb[u] -= rtemp1 * (*a1)
741 b0[ls[t]-delay] -= rtemp1 * a1[t]
753 a1 = a+(fira[k+0 ]+i);
754 a2 = a+(fira[k+1 ]+i);
755 a3 = a+(fira[k+2 ]+i);
756 a4 = a+(fira[k+3 ]+i);
758 rtemp1 = *a1/diaga[k+0];
759 rtemp2 = *a2/diaga[k+1];
760 rtemp3 = *a3/diaga[k+2];
761 rtemp4 = *a4/diaga[k+3];
763 diagb[u]-= rtemp1 * (*a1)
774 b0[ls[t]-delay] -= rtemp1 * a1[t]
782 a1 = a+(fira[k+0 ]+i);
783 a2 = a+(fira[k+1 ]+i);
785 rtemp1 = *a1/diaga[k+0];
786 rtemp2 = *a2/diaga[k+1];
788 diagb[u] -= rtemp1 * (*a1)
795 b0[ls[t]-delay] -= rtemp1 * a1[t]
800 a1 = a+(fira[k+0 ]+i);
802 rtemp1 = *a1/diaga[k+0];
804 diagb[u] -= rtemp1 * (*a1);
809 b0[ls[t]-delay] -= rtemp1 * a1[t];
814 static void ExtUpdSnode(chfac *cf,
825 *ujsze=cf->ujsze,*usub=cf->usub,*ujbeg=cf->ujbeg,*uhead=cf->uhead;
826 double *diag=cf->diag,*uval=cf->uval;
831 if (usnde==cf->nsnds-1) {
832 if (usub[ujbeg[f]+start]<subg[usnde]) {
833 printf(
"\n Index error");
840 ls = usub+ujbeg[f]+start;
841 sze = ujsze[f]-start;
844 iw[k-f] =uhead[k]+start-(k-f);
846 UpdSnodes(sze,l-f,sze,diag+f,uval,iw,diag+ls[0],uval,uhead+ls[0],ls);
852 static void PushFward(chfac *cf,
858 int j,s,t,u,k,stops,offset,sze,itemp,
860 *map=iw,*subg=cf->subg,
861 *ujsze=cf->ujsze,*uhead=cf->uhead,*ujbeg=cf->ujbeg,*usub=cf->usub;
862 double rtemp1,*l0,*l1,
863 *diag=cf->diag,*uval=cf->uval;
865 if (f>subg[snde+1]-subg[snde]) {
866 printf(
"\n PushFward");
876 offset = subg[snde+1]-f-1;
877 sze = ujsze[f] - offset;
878 ls1 = usub+ujbeg[f]+offset;
881 l1 = uval+uhead[f]+offset;
884 for(t=0; t<sze; ++t) {
887 if (j>=subg[cf->nsnds-1])
890 rtemp1 = l1[0]/diag[f];
891 diag[j] -= rtemp1*l1[0];
900 if (stops && ls1[stops-1]==ls0[stops-1]) {
901 for(s=0; s<stops; ++s)
902 l0[s] -= rtemp1 * l1[s];
906 for(s=0, u=0; s<stops; ++u) {
907 if (ls0[u]==ls1[s]) {
908 l0[u] -= rtemp1 * l1[s];
915 if (t<sze && !cf->sdens)
916 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],l-subg[snde],t,iw);
921 for(t=0; t<sze; ++t, ++offset) {
924 if (j>=subg[cf->nsnds-1]) {
926 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],
927 l-subg[snde],offset,iw);
940 if (stops && ls1[stops-1]==ls0[stops-1]) {
942 map[k-f] = uhead[k]+itemp-k;
944 UpdSnode(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j);
949 for(s=0, u=0; s<stops; ++u) {
950 if (ls1[s]==ls0[u]) {
957 map[k-f] = uhead[k]+itemp-k;
959 UpdSnodes(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j,map+l);
965 static int FacDenNode(chfac *cf,
970 int c,d,j,s,t,sncl,k,k0,m,cacsze,sze,offset,
971 *subg=cf->subg,*ujsze=cf->ujsze,
972 *ujbeg=cf->ujbeg,*uhead=cf->uhead,
973 *usub=cf->usub,*dhead=cf->dhead,
974 *dsub=cf->dsub,*dbeg=cf->dbeg,*ls;
975 double *diag=cf->diag,*uval=cf->uval;
978 cacsze = cf->cachesize*cf->cacheunit;
981 for(d=0; d<cf->ndens; ++d) {
983 for(k=dhead[d]; k<dhead[d+1]; ++k) {
986 if (usub[ujbeg[subg[s]]+offset]<subg[cf->nsnds-1]) {
987 printf(
"\nindex error1");
991 for(j=subg[s]; j<subg[s+1]; ++c, ++j) {
993 iw[c] = uhead[j]+offset-(j-subg[s]);
995 if (usub[ujbeg[j]+offset-(j-subg[s])]<subg[cf->nsnds-1]) {
996 printf(
"\nindex error");
1005 m = ujsze[subg[s]]-dbeg[k];
1006 ls = usub+ujbeg[subg[s]]+dbeg[k];
1009 t = cacsze/(m*
sizeof(double));
1013 UpdSnodes(m,t,m,rw+k,uval,iw+k,
1014 diag+ls[0],uval,uhead+ls[0],ls);
1024 sncl = cf->subg[s+1]-cf->subg[s];
1027 for(sze=0; sze<cacsze && k<sncl; ++k)
1028 sze += ujsze[subg[s]+k] *
sizeof(
double);
1032 else if (k>=k0+2 && sze>cacsze)
1038 sresp = FacSnode(cf,s,k0,k,iw,mode);
1042 iUpdSnode(cf,s,k0,k,k,sncl,iw);
1048 int ChlFact(chfac *cf,
1053 int s,sncl,k,k0,cacsze,sze,
1054 *subg=cf->subg,*ujsze=cf->ujsze;
1057 cacsze=cf->cachesize*cf->cacheunit;
1059 for(s=0; s+1<cf->nsnds; ++s) {
1060 sncl = cf->subg[s+1]-cf->subg[s];
1064 for(sze=0; sze<=cacsze && k<sncl; ++k)
1065 sze += ujsze[subg[s]+k]*
sizeof(
double);
1069 else if (k>=k0+2 && sze>cacsze)
1075 cid=FacSnode(cf,s,k0,k,iw,mode);
1079 iUpdSnode(cf,s,k0,k,k,sncl,iw);
1081 PushFward(cf,s,k0,k,iw);
1085 cid=FacDenNode(cf,iw,rw,mode);
1087 for (k=0;k<cf->nrow;k++){
1088 cf->sqrtdiag[k]=sqrt(fabs(cf->diag[k]));