QR_MUMPS
qrm_apply_q.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
44 subroutine _qrm_apply_q(qrm_mat, b)
45 
46  use _qrm_spmat_mod
47  use _qrm_rfpf_mod
48  use qrm_mem_mod
49  use qrm_common_mod
50  use qrm_task_mod
51  use qrm_queue_mod
52  implicit none
53 
54  type(_qrm_spmat_type), target :: qrm_mat
55  _qrm_data, intent(inout) :: b(:,:)
56 
57 
58  integer :: nth, thn, info, f, dones, qrm_nth
59  integer :: i, p
60  type(qrm_task_type) :: task
61  type(qrm_task_queue_handle) :: tq_h
62  type(qrm_queue) :: ready_q
63  type(_qrm_front_type), pointer :: front
64  integer, allocatable :: status(:)
65  type(qrm_adata_type), pointer :: adata
66  type(_qrm_fdata_type), pointer :: fdata
67  logical :: got_task
68 #if defined (_OPENMP)
69  integer(kind=omp_lock_kind), allocatable :: locks(:)
70  integer(kind=omp_lock_kind) :: dlock
71 #endif
72 
73  ! error management
74  integer :: err_act
75  character(len=*), parameter :: name='qrm_apply_q'
76 
77  call qrm_err_act_save(err_act)
78 
79  __qrm_prnt_dbg('("Applying Q")')
80 
81  ! simplify
82  adata => qrm_mat%adata
83  fdata => qrm_mat%fdata
84 
85  ! initialize queue
86  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
87 
88  ! initialize to safe values for the case where openmp is not used
89  ! the status and locks arrays are needed because multiple solves may
90  ! be run in parallel on the same fdata
91  call qrm_aalloc(status, qrm_mat%adata%nnodes)
92  __qrm_check_ret(name,'qrm_aalloc',9999)
93  !$ allocate(locks(adata%nnodes))
94 
95  do f = 1, adata%nnodes
96  status(f) = qrm_ready_
97  !$ call omp_init_lock(locks(f))
98  p = adata%parent(f)
99  if(p .eq. 0 .and. (adata%rc(f).ge.0)) call qrm_queue_push(ready_q, f)
100  end do
101 
102  !$ call omp_init_lock(dlock)
103 
104  if(adata%ncsing .gt. 0) then
105  dones = 1
106  else
107  dones = 0
108  end if
109 
110 #if defined(_OPENMP)
111  call omp_set_num_threads(1)
112  qrm_nth = qrm_mat%icntl(qrm_nthreads_)
113 #endif
114 
115  !$omp parallel &
116  !$omp & num_threads(qrm_nth) &
117  !$omp & private(got_task, task, nth, thn, info) &
118  !$omp & shared(ready_q, status, locks, dlock, dones, tq_h)
119 
120 #if defined(_OPENMP)
121  nth = omp_get_num_threads()
122  thn = omp_get_thread_num()
123 #else
124  nth = 1
125  thn = 0
126 #endif
127 
128  info = 0
129 
130  call qrm_par_mem_init()
131  call qrm_init_task_queue(tq_h)
132 
133  taskloop: do
134  if(qrm_err_stack%nelem .gt. 0) goto 9998
135 
136  ! fill up the queue if there are too few tasks
137  if(qrm_task_queue_card(tq_h) .lt. nth) then
138  call fill_queue_q( )
139  end if
140 
141  got_task = qrm_get_task(tq_h, task)
142 
143  if(.not. got_task) cycle taskloop ! queue is empty
144 
145  select case(task%id)
146  case(qrm_task_exit_)
147  !$omp barrier
148  ! done, exit
149  exit
150  case(qrm_task_sol_)
151  ! apply a set of Householder vectors
152  call apply_q(task, thn)
153  end select
154  end do taskloop
155 
156 9998 continue
157  call qrm_clean_task_queue(tq_h)
158  call qrm_par_mem_finalize()
159  !$omp end parallel
160 
161  call qrm_adealloc(status)
162  call qrm_queue_free(ready_q)
163 
164 #if defined (_OPENMP)
165  deallocate(locks)
166 #endif
167 
168  if(qrm_err_stack%nelem .gt. 0) then
169  call qrm_err_push(22, name)
170  goto 9999
171  end if
172 
173  call qrm_err_act_restore(err_act)
174  return
175 
176 9999 continue ! error management
177  call qrm_err_act_restore(err_act)
178  if(err_act .eq. qrm_abort_) then
179  call qrm_err_check()
180  end if
181  return
182 
183 contains
184 
185 
186 
187 !==========================================================================================
188 !==========================================================================================
189  subroutine fill_queue_q( )
190  implicit none
191 
192  type(_qrm_front_type), pointer :: front
193  integer :: f
194  integer :: thn
195  logical :: found
196  type(qrm_task_type) :: tsk
197 
198 #if defined (_OPENMP)
199  thn = omp_get_thread_num()
200 #else
201  thn = 0
202 #endif
203 
204  found = .false.
205 
206  f = 0
207  do
208 
209  f = qrm_queue_next(ready_q, f)
210  if(f .eq. 0) exit
211 
212  front => fdata%front_list(f)
213 
214 #if defined (_OPENMP)
215  if(.not. omp_test_lock(locks(f))) cycle
216  ! call omp_set_lock(locks(f))
217 #endif
218  if(status(f) .eq. qrm_ready_) then
219  tsk = qrm_task_type(qrm_task_sol_, front%num, 0, 0, .false.)
220  if(qrm_sched_task(tq_h, tsk, 'h')) then
221  ! mark the column as assigned
222  found = .true.
223  status(f) = qrm_busy_
224  end if
225 
226  end if
227 #if defined (_OPENMP)
228  call omp_unset_lock(locks(f))
229 #endif
230  end do
231 
232  ! if nothing was found above, then check if the facto is over
233  ! otherwise return
234  if(found) return
235 
236  call check_applyq_over( )
237 
238  return
239  end subroutine fill_queue_q
240 
241 
242 
243 
244 !==========================================================================================
245 !==========================================================================================
246  subroutine check_applyq_over( )
247  ! checks whether the apply_qt is over and eventually schedules a
248  ! termination task
249  implicit none
250 
251  type(qrm_task_type) :: tsk
252  logical :: found
253 
254  ! all the fronts are done
255 #if defined (_OPENMP)
256  call omp_set_lock( dlock )
257 #endif
258  if(dones .eq. fdata%nfronts) then
259  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
260  found = qrm_sched_task(tq_h, tsk, 't')
261  end if
262 #if defined (_OPENMP)
263  call omp_unset_lock( dlock )
264 #endif
265 
266  return
267  end subroutine check_applyq_over
268 
269 
270 
271 
272 
273 
274 !==========================================================================================
275 !==========================================================================================
276  subroutine apply_q(task, thn)
277  implicit none
278  type(qrm_task_type) :: task
279  integer :: thn
280 
281  type(_qrm_front_type), pointer :: front
282  integer :: f, p, c, info
283 
284  front => null()
285 
286  ! to make things easier
287  front => qrm_mat%fdata%front_list(task%front)
288  info = 0
289 
290  ! write(*,'(i3," =--> Apply Q : ",i4)')thn, front%num
291 
292  call front_q(front, info)
293  status(task%front) = qrm_done_
294  call qrm_queue_rm(ready_q, front%num)
295 
296  !$ call omp_set_lock( dlock )
297  dones = dones+1
298  !$ call omp_unset_lock( dlock )
299 
300  ! sweep over the children. Small children are treated immediately,
301  ! the others are pushed on the ready_q
302  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
303  c = adata%child(p)
304  if(adata%small(c) .eq. 1) then
305  call do_subtree_q(c, info)
306  if(info .ne. 0) goto 9997
307  else
308  call qrm_queue_push(ready_q, c)
309  end if
310  end do
311 
312 
313 9997 continue
314  return
315  end subroutine apply_q
316 
317 
318 
319 
320 
321 
322 !==========================================================================================
323 !==========================================================================================
324  subroutine do_subtree_q(fnum, info)
325  implicit none
326 
327  integer :: fnum, info
328 
329  type(_qrm_front_type), pointer :: front
330  integer :: node, c, acc, thn, p
331  type(qrm_queue) :: sub_q
332 
333  info = 0
334  acc = 0
335 
336  call qrm_queue_init(sub_q, adata%nnodes, qrm_fifo_)
337  call qrm_queue_push(sub_q, fnum)
338 
339  do
340  node = qrm_queue_pop(sub_q)
341 
342  if(node .eq. 0) exit
343 
344  front => qrm_mat%fdata%front_list(node)
345 
346  call front_q(front, info)
347 
348  !$ call omp_set_lock( dlock )
349  dones = dones+1
350  !$ call omp_unset_lock( dlock )
351 
352  status(node) = qrm_done_
353 
354  ! sweep over the children. Small children are treated immediately,
355  ! the others are pushed on the ready_q
356  do p = adata%childptr(front%num), adata%childptr(front%num+1)-1
357  c = adata%child(p)
358  call qrm_queue_push(sub_q, c)
359  acc = acc+1
360  end do
361 
362  end do
363 
364  call qrm_queue_free(sub_q)
365 
366  return
367 
368  end subroutine do_subtree_q
369 
370 
371 
372 
373 !==========================================================================================
374 !==========================================================================================
375  subroutine front_q(front, info)
377  use _qrm_rfpf_mod
378  use _qrm_spmat_mod
379  use qrm_mem_mod
380  use qrm_common_mod
381  implicit none
382 
383  type(_qrm_front_type) :: front
384  integer :: info
385 
386  integer :: thn, i
387  integer :: pv1, c, k, m, pv2, n, cnt, j, jp, pk
388  _qrm_data, allocatable :: work(:,:), in_b(:,:), t(:,:)
389  ! error management
390  character(len=*), parameter :: name='front_q'
391 
392  ! shortcut
393  if (min(front%m, front%n) .le. 0) goto 9999
394 
395  !$ call omp_set_lock( locks(front%num) )
396 
397  thn = 0
398  !$ thn = omp_get_thread_num()
399 
400  ! allocate everything that's needed
401  n = size(b,2)
402  call qrm_aalloc(in_b, front%m, n)
403  call qrm_aalloc(t, front%nb, front%nb)
404  call qrm_aalloc(work, n, front%nb)
405  __qrm_check_ret(name,'qrm_aalloc',9999)
406 
407  ! gather b into front%b
408  in_b = b(front%rows,:)
409 
410 #if defined (RFPF)
411  ! ! do all the computations here
412  ! pv1 = 1
413  ! do c = 1, ((front%ne-1)/front%nb)*front%nb, front%nb
414  ! k = min(front%nb, front%ne-c+1)
415  ! pv1 = pv1 + k*(k+1)/2 + (max(front%stair(c+k-1),c+k-1) -k-c+1)*k
416  ! end do
417 
418  ! ! set r to be the starting col of the Q factor
419  ! c = ((front%ne-1)/front%nb)*front%nb+1
420  ! do
421 
422  ! k = min(front%nb, front%ne-c+1)
423  ! m = max(front%stair(c+k-1)-c-k+1,0)
424 
425  ! if(m .le. 0) then
426  ! pv2 = 1
427  ! else
428  ! pv2 = pv1+(k*(k+1))/2
429  ! end if
430 
431  ! call _xrprft('f', 'c', m, k, front%h(pv1), front%h(pv2), m, front%tau(c), t(1,1), front%nb)
432 
433  ! call _xrprfb('l', 'n', 'f', 'c', m+k, n, k, front%h(pv1), front%h(pv2), m, &
434  ! & t(1,1), front%nb, in_b(c,1), front%m, work(1,1), n)
435 
436  ! if(c .le. 1) exit
437  ! pv1 = pv1-(max(front%stair(c-1),c-1)-c+front%nb+1)*front%nb + front%nb*(front%nb-1)/2
438  ! c = c-front%nb
439 
440  ! end do
441 
442 #else
443 
444  cnt=size(front%h)+1
445  outer: do jp = front%ne - mod(front%ne, front%nb)+1, 1, -front%nb
446  pk = min(front%nb, front%ne-jp+1)
447  if(pk .le. 0) cycle
448  ! write(*,*)'solve',jp,pk
449 
450  inner: do j = jp+pk-mod(pk,front%ib), jp, -front%ib
451  k = min(front%ib, jp+pk - j)
452  if(k .le. 0) cycle
453  m = max(front%stair(j+k-1),j+k-1) - j+1
454  cnt = cnt-m*k
455 
456  call _xlarfb('l', 'n', 'f', 'c', m, n, k, front%h(cnt), m, &
457  & front%h(cnt), m, in_b(j,1), front%m, work(1,1), n)
458  end do inner
459  end do outer
460 #endif
461 
462  ! scatter front%b into b
463  b(front%rows,:) = in_b
464 
465  ! cleanup
466  call qrm_adealloc(in_b)
467  call qrm_adealloc(t)
468  call qrm_adealloc(work)
469  __qrm_check_ret(name,'qrm_adelloc',9999)
470  !$ call omp_unset_lock( locks(front%num) )
471 
472 9999 continue
473  return
474 
475  end subroutine front_q
476 
477 
478 
479 end subroutine _qrm_apply_q
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
subroutine front_q(front, info)
This module contains generic interfaces for a number of auxiliary tools.
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
This module contains all the facilities for front queues.
subroutine _qrm_apply_q(qrm_mat, b)
This function applies Q to a vector/matrix.
Definition: qrm_apply_q.F90:45
This type defines the handle for the queues attached to a family of threads.
This module contains the interfaces of all non-typed routines.
A data type meant to to define a queue.
subroutine qrm_par_mem_finalize()
subroutine fill_queue_q()
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
This module contains the definition of a task type that is used for scheduling tasks during the facto...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
integer, parameter qrm_task_exit_
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
subroutine do_subtree_q(fnum, info)
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer function qrm_queue_pop(q)
Pops an element from a queue.
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
This type defines a computational task.
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
This type defines the data structure used to store a matrix.
integer, parameter qrm_fifo_
parameter to define the policy of the queue: FIFO
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
subroutine apply_q(task, thn)
This module contains the definition of the basic sparse matrix type and of the associated methods...
subroutine qrm_queue_free(q)
Frees a queue.
integer, parameter qrm_lifo_
parameter to define the policy of the queue: LIFO
integer, parameter qrm_task_sol_
subroutine check_applyq_over()
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
This module contains an implementation of some operations on triangular/trapezoidal matrices stored i...
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...