ergo
template_lapack_laneg.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LANEG_HEADER
36 #define TEMPLATE_LAPACK_LANEG_HEADER
37 
38 
39 #include "template_lapack_isnan.h"
40 
41 
42 template<class Treal>
43 int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal *
44  sigma, Treal *pivmin, integer *r__)
45 {
46  /* System generated locals */
47  integer ret_val, i__1, i__2, i__3, i__4;
48 
49  /* Local variables */
50  integer j;
51  Treal p, t;
52  integer bj;
53  Treal tmp;
54  integer neg1, neg2;
55  Treal bsav, gamma, dplus;
56  integer negcnt;
57  logical sawnan;
58  Treal dminus;
59 
60 
61 /* -- LAPACK auxiliary routine (version 3.2) -- */
62 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
63 /* November 2006 */
64 
65 /* .. Scalar Arguments .. */
66 /* .. */
67 /* .. Array Arguments .. */
68 /* .. */
69 
70 /* Purpose */
71 /* ======= */
72 
73 /* DLANEG computes the Sturm count, the number of negative pivots */
74 /* encountered while factoring tridiagonal T - sigma I = L D L^T. */
75 /* This implementation works directly on the factors without forming */
76 /* the tridiagonal matrix T. The Sturm count is also the number of */
77 /* eigenvalues of T less than sigma. */
78 
79 /* This routine is called from DLARRB. */
80 
81 /* The current routine does not use the PIVMIN parameter but rather */
82 /* requires IEEE-754 propagation of Infinities and NaNs. This */
83 /* routine also has no input range restrictions but does require */
84 /* default exception handling such that x/0 produces Inf when x is */
85 /* non-zero, and Inf/Inf produces NaN. For more information, see: */
86 
87 /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
88 /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
89 /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */
90 /* (Tech report version in LAWN 172 with the same title.) */
91 
92 /* Arguments */
93 /* ========= */
94 
95 /* N (input) INTEGER */
96 /* The order of the matrix. */
97 
98 /* D (input) DOUBLE PRECISION array, dimension (N) */
99 /* The N diagonal elements of the diagonal matrix D. */
100 
101 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
102 /* The (N-1) elements L(i)*L(i)*D(i). */
103 
104 /* SIGMA (input) DOUBLE PRECISION */
105 /* Shift amount in T - sigma I = L D L^T. */
106 
107 /* PIVMIN (input) DOUBLE PRECISION */
108 /* The minimum pivot in the Sturm sequence. May be used */
109 /* when zero pivots are encountered on non-IEEE-754 */
110 /* architectures. */
111 
112 /* R (input) INTEGER */
113 /* The twist index for the twisted factorization that is used */
114 /* for the negcount. */
115 
116 /* Further Details */
117 /* =============== */
118 
119 /* Based on contributions by */
120 /* Osni Marques, LBNL/NERSC, USA */
121 /* Christof Voemel, University of California, Berkeley, USA */
122 /* Jason Riedy, University of California, Berkeley, USA */
123 
124 /* ===================================================================== */
125 
126 /* .. Parameters .. */
127 /* Some architectures propagate Infinities and NaNs very slowly, so */
128 /* the code computes counts in BLKLEN chunks. Then a NaN can */
129 /* propagate at most BLKLEN columns before being detected. This is */
130 /* not a general tuning parameter; it needs only to be just large */
131 /* enough that the overhead is tiny in common cases. */
132 /* .. */
133 /* .. Local Scalars .. */
134 /* .. */
135 /* .. Intrinsic Functions .. */
136 /* .. */
137 /* .. External Functions .. */
138 /* .. */
139 /* .. Executable Statements .. */
140  /* Parameter adjustments */
141  --lld;
142  --d__;
143 
144  /* Function Body */
145  negcnt = 0;
146 /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
147  t = -(*sigma);
148  i__1 = *r__ - 1;
149  for (bj = 1; bj <= i__1; bj += 128) {
150  neg1 = 0;
151  bsav = t;
152 /* Computing MIN */
153  i__3 = bj + 127, i__4 = *r__ - 1;
154  i__2 = minMACRO(i__3,i__4);
155  for (j = bj; j <= i__2; ++j) {
156  dplus = d__[j] + t;
157  if (dplus < 0.) {
158  ++neg1;
159  }
160  tmp = t / dplus;
161  t = tmp * lld[j] - *sigma;
162 /* L21: */
163  }
164  sawnan = template_lapack_isnan(&t);
165 /* Run a slower version of the above loop if a NaN is detected. */
166 /* A NaN should occur only with a zero pivot after an infinite */
167 /* pivot. In that case, substituting 1 for T/DPLUS is the */
168 /* correct limit. */
169  if (sawnan) {
170  neg1 = 0;
171  t = bsav;
172 /* Computing MIN */
173  i__3 = bj + 127, i__4 = *r__ - 1;
174  i__2 = minMACRO(i__3,i__4);
175  for (j = bj; j <= i__2; ++j) {
176  dplus = d__[j] + t;
177  if (dplus < 0.) {
178  ++neg1;
179  }
180  tmp = t / dplus;
181  if (template_lapack_isnan(&tmp)) {
182  tmp = 1.;
183  }
184  t = tmp * lld[j] - *sigma;
185 /* L22: */
186  }
187  }
188  negcnt += neg1;
189 /* L210: */
190  }
191 
192 /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */
193  p = d__[*n] - *sigma;
194  i__1 = *r__;
195  for (bj = *n - 1; bj >= i__1; bj += -128) {
196  neg2 = 0;
197  bsav = p;
198 /* Computing MAX */
199  i__3 = bj - 127;
200  i__2 = maxMACRO(i__3,*r__);
201  for (j = bj; j >= i__2; --j) {
202  dminus = lld[j] + p;
203  if (dminus < 0.) {
204  ++neg2;
205  }
206  tmp = p / dminus;
207  p = tmp * d__[j] - *sigma;
208 /* L23: */
209  }
210  sawnan = template_lapack_isnan(&p);
211 /* As above, run a slower version that substitutes 1 for Inf/Inf. */
212 
213  if (sawnan) {
214  neg2 = 0;
215  p = bsav;
216 /* Computing MAX */
217  i__3 = bj - 127;
218  i__2 = maxMACRO(i__3,*r__);
219  for (j = bj; j >= i__2; --j) {
220  dminus = lld[j] + p;
221  if (dminus < 0.) {
222  ++neg2;
223  }
224  tmp = p / dminus;
225  if (template_lapack_isnan(&tmp)) {
226  tmp = 1.;
227  }
228  p = tmp * d__[j] - *sigma;
229 /* L24: */
230  }
231  }
232  negcnt += neg2;
233 /* L230: */
234  }
235 
236 /* III) Twist index */
237 /* T was shifted by SIGMA initially. */
238  gamma = t + *sigma + p;
239  if (gamma < 0.) {
240  ++negcnt;
241  }
242  ret_val = negcnt;
243  return ret_val;
244 } /* dlaneg_ */
245 
246 #endif
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:43
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal *sigma, Treal *pivmin, integer *r__)
Definition: template_lapack_laneg.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
bool logical
Definition: template_blas_common.h:39