KronLinInv  0.3
Kronecker-product-based linear inversion
ompi_kronlininv.f08
Go to the documentation of this file.
1 
2 !------------------------------------------------------------------------
3 !
4 ! Copyright 2018, Andrea Zunino
5 !
6 ! This file is part of KronLinInv.
7 !
8 ! KronLinInv is free software: you can redistribute it and/or modify
9 ! it under the terms of the GNU General Public License as published by
10 ! the Free Software Foundation, either version 3 of the License, or
11 ! (at your option) any later version.
12 !
13 ! KronLinInv is distributed in the hope that it will be useful,
14 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ! GNU General Public License for more details.
17 !
18 ! You should have received a copy of the GNU General Public License
19 ! along with KronLinInv. If not, see <http://www.gnu.org/licenses/>.
20 !
21 !------------------------------------------------------------------------
22 
23 
28 
29 
30 !-------------------------------------------------------------
34 !
36 !
37 !-------------------------------------------------------------
38 module realprec
39 
40  ! http://fortranwiki.org/fortran/show/Real+precision
41  integer,parameter :: pdigits=15 !15 !! used also by MPI
42  integer,parameter :: rexprange=307 !307 !! used also by MPI
43 
44  ! RESULT = SELECTED_REAL_KIND([P, R, RADIX])
45  integer,parameter :: dp = selected_real_kind(pdigits, rexprange)
46 
47  ! integer, parameter :: sp = selected_real_kind(6, 37)
48  ! integer, parameter :: dp = selected_real_kind(15, 307)
49  ! integer, parameter :: qp = selected_real_kind(33, 4931)
50 
51  public :: pdigits,rexprange
52  public :: dp
53 
54 end module realprec
55 
56 
57 !################################################################
58 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
59 !################################################################
60 
61 !-------------------------------------------------------------------------
65 !
68 !
69 !-------------------------------------------------------------------------
70 module parautil
71 
72  use realprec
73  use mpi
74  !!use mpi_f08
75  implicit none
76  !!include 'mpif.h'
77 
78 
79  integer,parameter :: masterrank=0
80  integer,protected :: myrank,numcpus,dp_real_type
81 
82  public :: masterrank,myrank,numcpus
83 
84 contains
85 
86  !!====================================================
87 
88  subroutine para_init()
89 
90  use, intrinsic :: iso_c_binding
91 
92  integer :: mpierr
93 
94  !! start MPI section
95  call mpi_init(mpierr)
96  if (mpierr /= 0) then
97  write(*,*) "call MPI_INIT(mpierr): mpierr /= 0"
98  stop
99  end if
100 
101  call mpi_comm_rank(mpi_comm_world, myrank, mpierr)
102  call mpi_comm_size(mpi_comm_world, numcpus, mpierr)
103  !! create mpi real and complex types
104  call mpi_type_create_f90_real(pdigits,rexprange,dp_real_type,mpierr)
105  !!call MPI_Type_create_f90_complex(pdigits,rexprange,complextype,error )
106 
107  if (myrank==masterrank) print*,"###>>>> Number of tasks:",numcpus
108 
109 
110  end subroutine para_init
111 
112  !!====================================================
113 
114  subroutine para_finish()
115  ! finalize mpi
116  integer :: ierror
117  call mpi_finalize(ierror)
118  end subroutine para_finish
119 
120  !!=====================================================
121 
122  subroutine para_barrier()
123  integer :: mpierr
124  call mpi_barrier(mpi_comm_world,mpierr)
125  end subroutine para_barrier
126 
127  !!=====================================================
128 
129  ! subroutine spreadwork1(nit,nunits,scheduling,looping)
130  ! integer :: nit,nunits
131  ! integer,allocatable :: scheduling(:),looping(:,:)
132  ! integer :: i,nitcpu,resto
133  ! if (nit<=nunits) then
134  ! write(*,*) myrank,": spredwork(): nit<=nunits"
135  ! write(*,*) nit,nunits
136  ! stop
137  ! end if
138  ! if (allocated(scheduling) ) deallocate(scheduling)
139  ! if (allocated(looping)) deallocate(looping)
140  ! allocate(scheduling(nunits),looping(nunits,2))
141 
142  ! !! compute distribution of workload for Nb
143  ! nitcpu = nit/nunits
144  ! scheduling(:) = nitcpu
145  ! resto = mod(nit,nunits)
146 
147  ! !! spread the remaining workload
148  ! scheduling(1:resto) = scheduling(1:resto) + 1
149  ! looping(1,:) = [1,scheduling(1)]
150  ! do i=2,nunits
151  ! looping(i,1) = sum(scheduling(1:i-1)) + 1
152  ! looping(i,2) = sum(scheduling(1:i))
153  ! end do
154 
155  ! !!print*,myrank,": loop 1",looping(:,1),&
156  ! !! "| loop 2",looping(:,2)," | sched ",scheduling
157 
158  ! end subroutine spreadwork1
159 
160  !!=====================================================
161 
162  subroutine spreadwork(nit,nunits,scheduling,looping,startpoint)
163  integer,intent(in) :: nit,nunits,startpoint
164  integer,allocatable,intent(out) :: scheduling(:),looping(:,:)
165  integer :: i,nitcpu,resto
166  if (nit<=nunits) then
167  write(*,*) myrank,": spredwork(): nit<=nunits"
168  write(*,*) nit,nunits
169  stop
170  end if
171  if (allocated(scheduling) ) deallocate(scheduling)
172  if (allocated(looping)) deallocate(looping)
173  allocate(scheduling(nunits),looping(nunits,2))
174 
175  !! compute distribution of workload for Nb
176  nitcpu = nit/nunits
177  scheduling(:) = nitcpu
178  resto = mod(nit,nunits)
179 
180  !! spread the remaining workload
181  scheduling(1:resto) = scheduling(1:resto) + 1
182  looping(1,:) = [startpoint,startpoint+scheduling(1)-1]
183  do i=2,nunits
184  looping(i,1) = sum(scheduling(1:i-1)) + startpoint
185  looping(i,2) = sum(scheduling(1:i)) + startpoint - 1
186  end do
187 
188  !!print*,myrank,": loop 1",looping(:,1),&
189  !! "| loop 2",looping(:,2)," | sched ",scheduling
190 
191  end subroutine spreadwork
192 
193  !!=====================================================
194 
195  subroutine para_srint(source,dest,tag,sint,rint)
196  integer,intent(in) :: source,dest,tag
197  integer,intent(in) :: sint
198  integer,intent(inout) :: rint
199  integer :: nelem,mpierr
200  integer :: mpistat(mpi_status_size)
201 
202  if ((myrank==source) .and. (myrank==dest)) then
203  rint = sint
204  else
205  if (myrank==source) then
206  call mpi_send(sint, nelem, mpi_integer, dest, tag, &
207  mpi_comm_world, mpierr)
208  end if
209  if (myrank==dest) then
210  call mpi_recv(rint, nelem, mpi_integer, source, tag,&
211  mpi_comm_world, mpistat, mpierr)
212  end if
213  end if
214 
215  end subroutine para_srint
216 
217  !!=====================================================
218 
219  subroutine para_srarr1ddp(source,dest,tag,sendrow,recvrow)
220 
221  integer,intent(in) :: source,dest,tag
222  real(dp),intent(in) :: sendrow(:)
223  real(dp),intent(inout) :: recvrow(:)
224 
225  integer :: mpierr,nelem
226  integer :: mpistat(mpi_status_size)
227 
228  nelem=size(sendrow) ! with no dim gives the total array size
229 
230  ! to avoid deadlocks
231  if ((myrank==source) .and. (myrank==dest)) then
232  !print*,"same send recv in ",myrank
233  recvrow = sendrow
234  else
235  if (myrank==source) then
236  !print*,"sending from ",myrank," to ",dest," tag=",tag
237  call mpi_send(sendrow, nelem, dp_real_type, dest, tag, &
238  mpi_comm_world, mpierr)
239  end if
240  if (myrank==dest) then
241  !print*,"receiving in ",myrank," from ",source," tag=",tag
242  call mpi_recv(recvrow, nelem, dp_real_type, source, tag,&
243  mpi_comm_world, mpistat, mpierr)
244  end if
245  end if
246 
247  end subroutine para_srarr1ddp
248 
249  !!=====================================================
250 
251  subroutine para_bc2ddp(arr,source)
252  real(dp) :: arr(:,:)
253  integer,intent(in) :: source
254  integer :: mpierr
255  call mpi_bcast(arr,size(arr), dp_real_type, source, mpi_comm_world, mpierr)
256  end subroutine para_bc2ddp
257 
258  !!=====================================================
259 
260  subroutine para_bc1ddp(arr,source)
261  real(dp) :: arr(:)
262  integer,intent(in) :: source
263  integer :: mpierr
264  call mpi_bcast(arr,size(arr), dp_real_type, source, mpi_comm_world, mpierr)
265  end subroutine para_bc1ddp
266 
267  !!=====================================================
268 
269  subroutine para_allgathv1ddp( scheduling, displs, sendbuf, recvbuf )
270  real(dp),intent(out) :: recvbuf(:)
271  integer,intent(in) :: scheduling(:),displs(:)
272  real(dp),intent(in) :: sendbuf(:)
273  integer :: mpierr
274  !! SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS,
275  !! DISPLS, RECVTYPE, COMM, IERROR
276  !print*,myrank," : size(sendbuf)",size(sendbuf),size(recvbuf)," loop ",displs," | sched ",scheduling
277  ! if (displs(1)/=0) then
278  ! write(*,*) myrank,": para_allgathv1ddp(): displs(1)/=0 potential problem (should be 0)"
279  ! stop
280  ! end if
281 
282  call mpi_allgatherv(sendbuf,size(sendbuf), dp_real_type, recvbuf, scheduling, &
283  displs, dp_real_type, mpi_comm_world, mpierr)
284  !print*,myrank," mpierr",mpierr
285  end subroutine para_allgathv1ddp
286 
287  !!=====================================================
288 
289  subroutine para_gathv1ddp( recvcounts, displs, receiver, sendbuf, recvbuf )
290  real(dp),intent(out) :: recvbuf(:)
291  integer,intent(in) :: recvcounts(:),receiver,displs(:)
292  real(dp),intent(in) :: sendbuf(:)
293  integer :: mpierr
294 
295  ! print*,myrank," : size(sendbuf)",size(sendbuf)," size(recvbuf)",size(recvbuf)," displs ",displs
296 
297  ! if (displs(1)/=0) then
298  ! write(*,*) myrank,": para_allgathv1ddp(): displs(1)/=0 potential problem (should be 0)"
299  ! stop
300  ! end if
301 
302  !! SENDBUF, SENDCOUNT, SENDTYPE, RECVBUF, RECVCOUNTS,
303  !! DISPLS, RECVTYPE, ROOT, COMM, IERROR
304 
305  call mpi_gatherv(sendbuf,size(sendbuf), dp_real_type, recvbuf, recvcounts, &
306  displs, dp_real_type, receiver, mpi_comm_world, mpierr)
307 
308  end subroutine para_gathv1ddp
309 
310  !!=====================================================
311 
312  subroutine timeinfo(totit,curit,startt,loopinfo)
313  use, intrinsic :: iso_fortran_env, only: output_unit
314  character(len=30) :: loopinfo
315  integer :: totit,curit
316  real(dp) :: frac,eta,startt
317 
318  frac = real(curit,dp)/(real(totit,dp))
319  eta = ( (mpi_wtime()-startt) / real(curit,dp) ) * &
320  (real(totit-curit,dp))
321  !! char(27) -> escape
322  !! ESC[#A -> moves cursor up # lines (ANSI)
323  !! achar(13) -> carriage return (ASCII)
324  write(output_unit,fmt='(a5,a34,f7.3,a7,f12.2,1x,a3)') &
325  char(27)//'[1A'//achar(13),&
326  adjustl(loopinfo)//': ',&
327  frac*100_dp,"%, ETA:",eta/60.0,"min"
328  flush(output_unit) !! to make sure it prints immediately
329  end subroutine timeinfo
330 
331  !!=====================================================
332 
333 end module parautil
334 
335 
336 !################################################################
337 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
338 !################################################################
339 
340 !-------------------------------------------------------------------------
344 !
348 !
349 !-------------------------------------------------------------------------
351 
352  use realprec
353  use parautil
354  use, intrinsic :: iso_fortran_env, only: output_unit
355 
356  implicit none
357 
358  private :: solvels,symgeneigvv,symsolvels
360 
361 contains
362 
363  !!!=====================================================
364  !--------------------------------------------------
365  ! DESCRIPTION:
454  ! REVISION HISTORY:
456  !
457  !--------------------------------------------------
458  subroutine calcfactors(G1,G2,G3,Cm1,Cm2,Cm3,Cd1,Cd2,Cd3,&
459  U1,U2,U3,diaginvlambda,iUCm1,iUCm2,iUCm3,&
460  iUCmGtiCd1,iUCmGtiCd2,iUCmGtiCd3 )
461 
462  real(dp),intent(in) :: G1(:,:),G2(:,:),G3(:,:),&
463  Cm1(:,:),Cm2(:,:),Cm3(:,:),&
464  Cd1(:,:),Cd2(:,:),Cd3(:,:)
465 
466  real(dp),intent(out) :: U1(:,:),U2(:,:),U3(:,:),&
467  iUCm1(:,:),iUCm2(:,:),iUCm3(:,:),&
468  iUCmGtiCd1(:,:),iUCmGtiCd2(:,:),iUCmGtiCd3(:,:),&
469  diaginvlambda(:)
470 
471  real(dp),allocatable :: lambda1(:),lambda2(:),lambda3(:)
472  real(dp),allocatable :: iCdG1(:,:),iCdG2(:,:),iCdG3(:,:),&
473  GtiCdG1(:,:),GtiCdG2(:,:),GtiCdG3(:,:),&
474  ide1(:,:),ide2(:,:),ide3(:,:),&
475  iCd1(:,:),iCd2(:,:),iCd3(:,:)
476  integer :: i,j,k,p,nm,nm1,nm2,nm3,nd1,nd2,nd3
477  character :: uplo
478 
479  !! Lapack routines
480  external dsygv
481 
482  nm1 = size(cm1,1)
483  nm2 = size(cm2,1)
484  nm3 = size(cm3,1)
485  nd1 = size(cd1,1)
486  nd2 = size(cd2,1)
487  nd3 = size(cd3,1)
488  nm = nm1*nm2*nm3
489 
490  !! compute preliminary stuff
491  print*,'calcfactors(): compute preliminary stuff'
492  allocate(icdg1(nd1,nm1),icdg2(nd2,nm2),icdg3(nd3,nm3))
493  call symsolvels( cd1, g1, icdg1 )
494  call symsolvels( cd2, g2, icdg2 )
495  call symsolvels( cd3, g3, icdg3 )
496 
497  allocate(gticdg1(nm1,nm1),gticdg2(nm2,nm2),gticdg3(nm3,nm3))
498  gticdg1 = matmul( transpose(g1), icdg1 )
499  gticdg2 = matmul( transpose(g2), icdg2 )
500  gticdg3 = matmul( transpose(g3), icdg3 )
501 
502  !!--------------------------
503  !! fa
504  !!--------------------------
505  !! compute eigendecomposition
506  print*,'calcfactors(): compute fa'
507  allocate(lambda1(nm1),lambda2(nm2),lambda3(nm3))
508  uplo = 'L'
509  call symgeneigvv(gticdg1,uplo,cm1,lambda1,u1)
510  call symgeneigvv(gticdg2,uplo,cm2,lambda2,u2)
511  call symgeneigvv(gticdg3,uplo,cm3,lambda3,u3)
512 
513  !!--------------------------
514  !! fb
515  !!--------------------------
516  !! calculates diagonal central factor
517  print*,'calcfactors(): compute fb'
518  p=1
519  do i=1,nm1
520  do j=1,nm2
521  do k=1,nm3
522  diaginvlambda(p) = 1.0_dp/(1.0_dp+lambda1(i)*lambda2(j)*lambda3(k))
523  p = p+1
524  end do
525  end do
526  end do
527 
528  !!--------------------------
529  !! fc
530  !!--------------------------
531  !! computes U1^-1 Cmx, U2^-1 Cmy, U3^-1 Cmz
532  print*,'calcfactors(): compute fc'
533  call solvels( u1, cm1, iucm1 )
534  call solvels( u2, cm2, iucm2 )
535  call solvels( u3, cm3, iucm3 )
536 
537 
538  !!--------------------------
539  !! fd
540  !!--------------------------
541  print*,'calcfactors(): compute fd'
542  allocate(ide1(nd1,nd1),ide2(nd2,nd2),ide3(nd3,nd3))
543  ide1 = 0.0_dp ! Initialize the array.
544  ide2 = 0.0_dp
545  ide3 = 0.0_dp
546  forall(i = 1:nd1) ide1(i,i) = 1.0_dp ! Set the diagonal.
547  forall(i = 1:nd2) ide2(i,i) = 1.0_dp
548  forall(i = 1:nd3) ide3(i,i) = 1.0_dp
549  allocate(icd1(nd1,nd1),icd2(nd2,nd2),icd3(nd3,nd3))
550  call symsolvels( cd1, ide1, icd1 )
551  call symsolvels( cd2, ide2, icd2 )
552  call symsolvels( cd3, ide3, icd3 )
553 
554  iucmgticd1 = matmul( iucm1, matmul( transpose(g1), icd1 ))
555  iucmgticd2 = matmul( iucm2, matmul( transpose(g2), icd2 ))
556  iucmgticd3 = matmul( iucm3, matmul( transpose(g3), icd3 ))
557 
558  print*,"calcfactors(): end "
559  end subroutine calcfactors
560 
561  !!!==================================================
562  !-------------------------------------------------
564  !
574  !
575  !
576  ! REVISION HISTORY:
581  !
582  !-------------------------------------------------
583  subroutine blockpostcov(U1,U2,U3, diaginvlambda, &
584  iUCm1,iUCm2,iUCm3, astart,aend,bstart,bend, postC)
585  !!
586  !! Calculates a block of the posterior covariance
587  !!
588  real(dp),intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
589  iUCm2(:,:),iUCm3(:,:)
590  !! diaginvlambda = (I + lam1 x lam2 x lam3 )^-1
591  real(dp),intent(in) :: diaginvlambda(:) !! diagonal/vector central factor
592  real(dp),intent(out) :: postC(:,:)
593  integer,intent(in) :: astart,aend,bstart,bend
594 
595  integer :: a,b,irow,i,j,sender
596  real(dp),allocatable :: row2(:),col1(:)
597  integer :: Nr12,Nc1
598  integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb,ntota,ntotb
599 
600  integer,allocatable :: av(:)!,bv(:)
601  integer,allocatable :: iv(:),jv(:),kv(:) !,lv(:),mv(:),nv(:)
602  integer :: p,everynit,impicount,mytotit,rirow
603  real(dp) :: startt,firststartt,endt
604  integer,allocatable :: scheduling(:),looping(:,:)
605  real(dp),allocatable :: postcmpi(:,:),recvrow(:),sendrow(:)
606  character(len=30) :: loopinfo
607 
608  if (myrank==masterrank) then
609  write(output_unit,*) "posteriormean(): calculating posterior mean... [using OpenMPI]"
610  end if
611 
612  ni = size(u1,1)
613  nl = size(u1,2)
614  nj = size(u2,1)
615  nm = size(u2,2)
616  nk = size(u3,1)
617  nn = size(u3,2)
618  na = ni*nj*nk
619  nb = nl*nm*nn
620 
621  !! check shape of output array
622  if (na /= nb) then
623  write(*,*) '(Na /= Nb)', na,nb
624  stop
625  end if
626  ntota = aend-astart+1
627  ntotb = bend-bstart+1
628  if ( (size(postc,1)/=ntota) .or. (size(postc,2)/=ntotb)) then
629  write(*,*) "Wrong size of the posterior covariance block array."
630  stop
631  end if
632  !! check limits of requested block
633  if ( (astart<1) .or. (aend>na) .or. (astart>aend) .or. &
634  (bstart<1) .or. (bend>na) .or. (bstart>bend) ) then
635  write(*,*) "Wrong size of the requested block array."
636  stop
637  end if
638 
639  !! vectorize row and col calculations for Kron prod AxBxC
640  allocate(av(na),iv(na),jv(na),kv(na))
641  !allocate(bv(Nb),lv(Nb),mv(Nb),nv(Nb))
642  forall(p = 1:na) av(p) = p
643  !forall(p = 1:Nb) bv(p) = p
644 
645  !! vectors containing all possible indices for
646  !! row calculations of Kron prod AxBxC
647  iv = (av-1)/(nk*nj)+1
648  jv = (av-1-(iv-1)*nk*nj)/nk + 1
649  kv = av-(jv-1)*nk-(iv-1)*nk*nj
650 
651  !! allocate stuff
652  nr12 = size(u1,2)* size(u2,2)* size(u3,2)
653  nc1 = size(iucm1,1)* size(iucm2,1)* size(iucm3,1)
654  allocate(row2(nr12),col1(nc1))
655 
656  !!------------------------
657  if (ntota<1000) then
658  everynit = 20
659  else
660  everynit = ntota/1000
661  end if
662 
663  !!---------------------------------
664  firststartt = mpi_wtime()
665  loopinfo = "blockpostcov() "
666  if (myrank==masterrank) write(output_unit,*)
667  !! calculate scheduling for a
668  call spreadwork(ntota,numcpus,scheduling,looping,1)
669  allocate(postcmpi(scheduling(myrank+1),ntotb),recvrow(ntotb),sendrow(ntotb))
670  mytotit = scheduling(myrank+1)
671 
672  !!print*,myrank,":",looping(myrank+1,1),"|",looping(myrank+1,2),"|",scheduling
673 
674  !!-----------------------
675  startt = mpi_wtime()
676  impicount = 0
677  do irow=looping(myrank+1,1),looping(myrank+1,2)
678  !! set a
679  a=irow+astart-1
680  !! info
681  if ( (myrank==masterrank) .and. (mod(irow,everynit)==0) ) then
682  call timeinfo(mytotit,irow,startt,loopinfo)
683  end if
684 
685  !! row first two factors
686  !! a row x diag matrix
687  !!row2 = U1(iv(a),lv) * U2(jv(a),mv) * U3(kv(a),nv) * diaginvlambda
688  row2 = u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda
689 
690  j=0
691  do b=bstart,bend
692  j=j+1
693  !! calculate one row of first TWO factors
694  !!call columnAxBxC(Ni,Nj,Nk,Nl,Nm,Nn, iUCm1,iUCm2,iUCm3,b,col1)
695  col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
696 
697  !! calculate one element of the posterior covariance
698  i = irow-looping(myrank+1,1)+1
699  postcmpi(i,j) = sum(row2*col1)
700 
701  end do
702 
703  end do
704 
705  !! collect all results in masterrank
706  call para_barrier()
707  do sender=0,numcpus-1
708  if ( (myrank==sender) .or. (myrank==masterrank) ) then
709  do i=looping(sender+1,1),looping(sender+1,2)
710  !!----------------------
711  !! send/rec each row
712  !print*,"==>",myrank,": sender=",sender,"i=",i
713  if (myrank==sender) sendrow = postcmpi(i,:)
714  call para_srarr1ddp(sender,masterrank,i,sendrow,recvrow)
715  !call para_srint(myrank,masterrank,impicount,irow,rirow)
716  if (myrank==masterrank) postc(i,:) = recvrow
717  end do
718  end if
719  end do
720 
721  call para_barrier()
722  if (myrank==masterrank) then
723  write(output_unit,*)
724  endt = MPI_Wtime()
725  print*,"blockpostcov():",endt-firststartt," MPI wall clock time"
726  end if
727 
728  end subroutine blockpostcov
729 
730  !!!==================================================
731  !-------------------------------------------------
742  !
743  !
744  ! REVISION HISTORY:
748  !
749  !
750  !-------------------------------------------------
751  subroutine bandpostcov(U1,U2,U3, diaginvlambda, &
752  iUCm1,iUCm2,iUCm3, lowdiag, updiag, bandpostC)
753  !!
754  !! Calculate a band of the posterior covariance
755  !!
756  real(dp),intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
757  iUCm2(:,:),iUCm3(:,:)
758  !! diaginvlambda = (I + lam1 x lam2 x lam3 )^-1
759  real(dp),intent(in) :: diaginvlambda(:) !! diagonal/vector central factor
760  real(dp),intent(inout) :: bandpostC(:,:)
761  integer,intent(in) :: lowdiag, updiag
762 
763  integer :: a,b
764  real(dp),allocatable :: row2(:),col1(:),recvrow(:),bandpostCmpi(:)
765  integer :: Nr12,Nc1
766  integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb
767 
768  real(dp) :: firststartt,startt
769  integer,allocatable :: av(:)!,bv(:)
770  integer,allocatable :: iv(:),jv(:),kv(:) !,lv(:),mv(:),nv(:)
771  integer :: p,aband,aend,bband,astart,d,bband1,bband2,ntota,everynit,mytotit
772  integer,allocatable :: scheduling(:),looping(:,:)
773  character(len=30) :: loopinfo
774  character(len=12) :: dnum
775 
776  ni = size(u1,1)
777  nl = size(u1,2)
778  nj = size(u2,1)
779  nm = size(u2,2)
780  nk = size(u3,1)
781  nn = size(u3,2)
782  na = ni*nj*nk
783  nb = nl*nm*nn
784 
785  if (na /= nb) then
786  write(*,*) '(Na /= Nb)', na,nb
787  stop
788  end if
789  if ( (updiag>=na) .or. (lowdiag>=na) .or. (lowdiag<0) .or. (updiag<0) ) then
790  write(*,*) .or."(updiag<Na) (lowdiag<Na)"
791  write(*,*) "updiag",updiag,"Na",na,"lowdiag",lowdiag,"Na",na
792  stop
793  end if
794 
795  !! vectorize row and col calculations for Kron prod AxBxC
796  allocate(av(na),iv(na),jv(na),kv(na))
797  !allocate(bv(Nb),lv(Nb),mv(Nb),nv(Nb))
798  forall(p = 1:na) av(p) = p
799  !forall(p = 1:Nb) bv(p) = p
800 
801  !! vectors containing all possible indices for
802  !! row calculations of Kron prod AxBxC
803  iv = (av-1)/(nk*nj)+1
804  jv = (av-1-(iv-1)*nk*nj)/nk+1
805  kv = av-(jv-1)*nk-(iv-1)*nk*nj
806  !! vectors containing all possible indices for
807  !! column calculations of Kron prod AxBxC
808  ! lv = (bv-1)/(Nn*Nm) + 1
809  ! mv = (bv-1-(lv-1)*Nn*Nm)/Nn + 1
810  ! nv = bv-(mv-1)*Nn-(lv-1)*Nn*Nm
811 
812  !! allocate stuff
813  nr12 = size(u1,2)* size(u2,2)* size(u3,2)
814  nc1 = size(iucm1,1)* size(iucm2,1)* size(iucm3,1)
815  allocate(row2(nr12),col1(nc1))
816 
817  ! Lapack: http://www.netlib.org/lapack/lug/node124.html
818  ! aij is stored in AB(ku+1+i-j,j) for max(1,j-ku) <= i <= \min(m,j+kl).
819  !------------
820  ! Diagonals of a matrix
821  ! i + d = j
822  ! main diag d = 0
823  ! upper d > 0
824  ! lower d < 0
825  ! diagonals of a matrix and indices of related band matrix
826  ! ONLY for square matrix
827 
828  !!---------------------------------
829  firststartt = mpi_wtime()
830  ! !! calculate scheduling for a
831  ! call spreadwork(ntota,numcpus,scheduling,looping,1)
832  ! allocate(postcmpi(scheduling(myrank+1),ntotb),recvrow(ntotb),sendrow(ntotb))
833  ! mytotit = looping(myrank+1,2)-looping(myrank+1,1)+1
834 
835  allocate(bandpostcmpi(size(bandpostc,2)), recvrow(size(bandpostc,2)))
836  ! initialize postC
837 
838  bandpostc = 0.0_dp
839 
840  do d=-lowdiag,updiag
841  call para_barrier()
842 
843  !!--------------------------
844  if (d<0) then
845  astart = abs(d)+1
846  else
847  astart = 1
848  end if
849  if (d>0) then
850  aend = na-abs(d)
851  else
852  aend = na
853  end if
854  !print*,'diagonal',d
855 
856  !! calculate scheduling for a
857  ntota = aend-astart+1
858  call spreadwork(ntota,numcpus,scheduling,looping,astart)
859  if (ntota<1000) then
860  everynit = ntota/100
861  else
862  everynit = ntota/1000
863  end if
864 
865  write(dnum,"(i9)") d
866  loopinfo = "bandpostcov() diag "//trim(dnum)//" "
867  startt = mpi_wtime()
868  if (myrank==masterrank) write(output_unit,*)
869  !! indices of normal matrix
870  bandpostcmpi(:) = 0.0_dp
871  aband = updiag+1-d !! aband is constant within this loop
872  do a=looping(myrank+1,1),looping(myrank+1,2) !!astart,aend
873  !! info
874  if ( (myrank==masterrank) .and. (mod(a,everynit)==0) ) then
875  call timeinfo(scheduling(myrank+1),a-astart+1,startt,loopinfo)
876  end if
877 
878  !if ( (myrank==masterrank) .and. (mod(a,250)==0) ) print*,"d",d,"a",a
879  !!--------------------------------------------------------
880  b = a+d
881  !! indices of the band matrix
882  !aband = updiag+1+a-b
883  !bband = b
884  !!--------------------------------------------------------
885 
886  !!----------------------------------------------------------------
887  ! row first two factors
888  row2 = diaginvlambda * u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv)
889 
890  !! calculate one row of first TWO factors
891  !!call columnAxBxC(Ni,Nj,Nk,Nl,Nm,Nn, iUCm1,iUCm2,iUCm3,b,col1)
892  col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
893 
894  !! calculate one element of the posterior covariance
895  !! store it in the band storage format
896  !!bandpostC(aband,bband) = sum(row2*col1)
897  bandpostcmpi(b) = sum(row2*col1)
898  !print*, a,b,aband,bband,bandpostC(aband,bband)
899  !!----------------------------------------------------------------
900 
901  end do ! a=astart,aend
902 
903  !! send/recv rows
904  !! collect all results in masterrank
905 
906 
907  bband1 = looping(myrank+1,1)+d !!b = a+d
908  bband2 = looping(myrank+1,2)+d
909 
910  call para_gathv1ddp(scheduling, looping(:,1)-1, masterrank, &
911  bandpostcmpi(bband1:bband2), recvrow )
912  if (myrank==masterrank) bandpostc(aband,:) = recvrow
913 
914 
915  end do
916 
917  end subroutine bandpostcov
918 
919  !!!===============================================================
920  !-------------------------------------------------
922  !
939  !
940  !
941  ! REVISION HISTORY:
945  !
946  !-------------------------------------------------
947  subroutine posteriormean(U1,U2,U3, diaginvlambda, Z1,Z2,Z3,&
948  G1,G2,G3, mprior, dobs, postm)
950  !!
951  use parautil
952 
953  !!
954  !! Calculate the posterior mean model
955  !!
956  real(dp),intent(in) :: U1(:,:),U2(:,:),U3(:,:), G1(:,:),G2(:,:),G3(:,:)
957  real(dp),intent(in) :: Z1(:,:),Z2(:,:),Z3(:,:)
958  real(dp),intent(in) :: mprior(:),dobs(:),diaginvlambda(:)
959  real(dp),intent(out) :: postm(:)
960 
961  real(dp),allocatable :: ddiff(:),ddiffmpi(:),postmmpi(:)
962  integer :: a,b,Na,Nb
963  integer :: Ni,Nj,Nk,Nl,Nm,Nn
964 
965  integer,allocatable :: av(:),bv(:)
966  integer,allocatable :: iv(:),jv(:),kv(:),lv(:),mv(:),nv(:)
967  !! ivo(:),jvo(:),kvo(:)
968  integer :: p,everynit
969 
970  real(dp),allocatable :: Zh(:),elUDZh(:),Zhmpi(:),elUDZhmpi(:)
971 
972  integer :: ia,ib
973  real(dp) :: startt,firststartt,endt
974  character(len=30) :: loopinfo
975 
976  integer ::curit,totit
977  integer,allocatable :: scheduling(:),looping(:,:)
978 
979  !! sizes
980  ni = size(z1,1)
981  nl = size(z1,2)
982  nj = size(z2,1)
983  nm = size(z2,2)
984  nk = size(z3,1)
985  nn = size(z3,2)
986 
987  !! sizes
988  ! Nr12 = size(U1,2)*size(U2,2)*size(U3,2)
989  ! Nc1 = size(Z1,1)*size(Z2,1)*size(Z3,1)
990  na = size(mprior,1)
991  nb = size(dobs,1)
992 
993  !! allocate stuff
994  allocate(ddiff(nb))
995  allocate(zh(na),eludzh(na))
996 
997  !! vectorize row and col calculations for Kron prod AxBxC
998  !!allocate(ivo(Nb),jvo(Nb),kvo(Nb))
999  allocate(av(na),iv(na),jv(na),kv(na))
1000  allocate(bv(nb),lv(nb),mv(nb),nv(nb))
1001  forall(p = 1:na) av(p) = p
1002  forall(p = 1:nb) bv(p) = p
1003 
1004  !! vectors containing all possible indices for
1005  !! row calculations of Kron prod AxBxC
1006  iv = (av-1)/(nk*nj)+1
1007  jv = (av-1-(iv-1)*nk*nj)/nk+1
1008  kv = av-(jv-1)*nk-(iv-1)*nk*nj
1009  !! vectors containing all possible indices for
1010  !! column calculations of Kron prod AxBxC
1011  lv = (bv-1)/(nn*nm) + 1
1012  mv = (bv-1-(lv-1)*nn*nm)/nn + 1
1013  nv = bv-(mv-1)*nn-(lv-1)*nn*nm
1014  !! Gs have different shape than Us !!
1015 
1016  !!#######################
1017  if (na<1000)then
1018  everynit = na/50
1019  else if (na<100000) then
1020  everynit = na/100
1021  else
1022  everynit = na/1000
1023  end if
1024 
1025  !!---------------------------------
1026  firststartt = mpi_wtime()
1027 
1028  !!#######################
1029  !!# dobs - dcalc #
1030  !!#######################
1031  startt = mpi_wtime()
1032  loopinfo = "posteriormean(): loop 1/3"
1033  if (myrank==masterrank) write(output_unit,*)
1034  !! calculate scheduling for Nb
1035  call spreadwork(nb,numcpus,scheduling,looping,1)
1036  allocate(ddiffmpi(scheduling(myrank+1)))
1037  !! difference obs-calc data
1038  totit = looping(myrank+1,2)-looping(myrank+1,1)
1039  do b=looping(myrank+1,1),looping(myrank+1,2) !! rank starts from 0
1040  curit = (b-looping(myrank+1,1)+1)
1041  if ( (myrank==masterrank) .and. (mod( curit,everynit)==0) ) then
1042  call timeinfo(totit,curit,startt,loopinfo)
1043  end if
1044 
1045  !!--------------------------------------------------------------------------------
1046  ib = b - looping(myrank+1,1) + 1
1047  ddiffmpi(ib) = dobs(b) - sum(mprior * g1(lv(b),iv) * g2(mv(b),jv) * g3(nv(b),kv))
1048  !!--------------------------------------------------------------------------------
1049 
1050  end do
1051 
1052  startt = mpi_wtime()
1053  !! assemble full ddfiff and broadcast it
1054  !! displacements must start from 0, not 1, so looping-1
1055  call para_allgathv1ddp( scheduling,looping(:,1)-1, ddiffmpi, ddiff )
1056  if (myrank==masterrank) then
1057  print*,"time for first allgatherv:",mpi_wtime()-startt,"s"
1058  end if
1059 
1060 
1061  !!#######################
1062  !!# === U d Z h === #
1063  !!#######################
1064  startt = mpi_wtime()
1065  !! calculate scheduling for Na
1066  loopinfo = "posteriormean(): loop 2/3"
1067  if (myrank==masterrank) write(output_unit,*)
1068  call spreadwork(na,numcpus,scheduling,looping,1)
1069  allocate(zhmpi(scheduling(myrank+1)))
1070  totit = looping(myrank+1,2)-looping(myrank+1,1)
1071  do a=looping(myrank+1,1),looping(myrank+1,2) !! rank starts from 0
1072  curit =(a-looping(myrank+1,1)+1)
1073  if ( ( myrank==masterrank ) .and. (mod( curit,everynit)==0) ) then
1074  call timeinfo(totit,curit,startt,loopinfo)
1075  end if
1076 
1077  !!---------------------------------------------------------------------------
1078  ia = a - looping(myrank+1,1) + 1
1079  zhmpi(ia) = sum( z1(iv(a),lv) * z2(jv(a),mv) * z3(kv(a),nv) * ddiff )
1080  !!---------------------------------------------------------------------------
1081 
1082  end do
1083 
1084  startt = mpi_wtime()
1085  !! assemble full Zh and broadcast it
1086  call para_allgathv1ddp( scheduling,looping(:,1)-1, zhmpi, zh )
1087  if (myrank==masterrank) then
1088  print*,"time for second allgatherv:",mpi_wtime()-startt,"s"
1089  end if
1090 
1091  !!-----------------------------------------------
1092  !! Second loop
1093  !!### need to re-loop because full Zh is needed
1094  !!#######################
1095  !!# post(a) #
1096  !!#######################
1097  startt = mpi_wtime()
1098  !! calculate scheduling for Na
1099  loopinfo = "posteriormean(): loop 3/3"
1100  if (myrank==masterrank) write(output_unit,*)
1101  call spreadwork(na,numcpus,scheduling,looping,1)
1102  allocate(eludzhmpi(scheduling(myrank+1)),postmmpi(scheduling(myrank+1)))
1103  totit = looping(myrank+1,2)-looping(myrank+1,1)
1104  do a=looping(myrank+1,1),looping(myrank+1,2) !! rank starts from 0
1105  curit = (a-looping(myrank+1,1)+1)
1106  if ( ( myrank==masterrank ) .and. (mod(curit,everynit)==0) ) then
1107  call timeinfo(totit,curit,startt,loopinfo)
1108  end if
1109 
1110  !!--------------------------------------------------------------------------------------
1111  ia = a - looping(myrank+1,1) + 1
1112  !!
1113  eludzhmpi(ia) = sum( u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda * zh )
1114  !! element of the posterior mean
1115  postmmpi(ia) = mprior(a) + eludzhmpi(ia)
1116  !!--------------------------------------------------------------------------------------
1117 
1118  end do
1119 
1120  startt = mpi_wtime()
1121  !! assemble full postm only for masterrank
1122  call para_gathv1ddp( scheduling,looping(:,1)-1, masterrank, postmmpi, postm )
1123  if (myrank==masterrank) then
1124  print*,"time for third allgatherv:",mpi_wtime()-startt,"s"
1125  end if
1126 
1127 
1128  if (myrank==masterrank) then
1129  write(output_unit,*)
1130  write(output_unit,*)
1131  endt = MPI_Wtime()
1132  print*,"posteriormean():",endt-firststartt," MPI wall clock time"
1133  end if
1134 
1135  !print*,myrank,":",shape(ddiffmpi),shape(Zhmpi),shape(elUDZhmpi),shape(postmmpi),&
1136  ! shape(ddiff),shape(Zh),shape(elUDZh),shape(postm)
1137 
1138  return
1139  end subroutine posteriormean
1140 
1141  !!!==================================================
1142  !-------------------------------------------------
1154  !
1155  !
1156  ! REVISION HISTORY:
1158  !
1159  !-------------------------------------------------
1160  subroutine symgeneigvv(A,uplo,Bpd,lambda,U)
1161  real(dp),intent(in) :: A(:,:),Bpd(:,:)
1162  character,intent(in) :: uplo
1163  real(dp),intent(out) :: lambda(:),U(:,:)
1164  integer :: n,lwork,itype,info
1165  character :: jobz
1166  real(dp),allocatable :: work(:),tmpB(:,:)
1167  ! DSYGV computes all the eigenvalues, and optionally, the eigenvectors
1168  ! of a real generalized symmetric-definite eigenproblem, of the form
1169  ! A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x.
1170  ! Here A and B are assumed to be symmetric and B is also
1171  ! positive definite.
1172  itype = 3
1173  jobz = 'V'
1174  n=size(a,1)
1175  if ( (size(a,2)/=n) .or. (size(bpd,1)/=n) .or. (size(bpd,2)/=n) ) then
1176  write(*,*) "symgeneigvv(): Matrices have wrong sizes"
1177  stop
1178  end if
1179  allocate(tmpb(n,n))
1180  u = a
1181  tmpb = bpd
1182  lwork=3*n-1
1183  ! lwork=-1
1184  ! allocate(work(1))
1185  ! call DSYGV(itype,jobz,uplo,nm1,U1,nm1,choCm1,nm1,lambda1,work,lwork,info)
1186  ! deallocate(work)
1187  allocate(work(lwork))
1188  call dsygv(itype,jobz,uplo,n,u,n,tmpb,n,lambda,work,lwork,info)
1189  if (info /= 0) stop 'symgeneigvv(): Matrix eigendecomposition failed!'
1190  end subroutine symgeneigvv
1191 
1192  !!!==================================================
1193  !-------------------------------------------------
1196  !
1200  !
1201  !
1202  ! REVISION HISTORY:
1204  !
1205  !-------------------------------------------------
1206  subroutine solvels( A, B, sol )
1207  real(dp),intent(in) :: A(:,:),B(:,:)
1208  real(dp),intent(out) :: sol(:,:)
1209  integer,allocatable :: ipiv(:)
1210  integer :: n,nrhs,lda,ldb
1211  integer :: info
1212  real(dp),allocatable :: A2(:,:)
1213  ! External procedures defined in LAPACK
1214  external dgesv
1215  n = size(a,1)
1216  nrhs = size(b,2)
1217  allocate(a2(n,n))
1218  a2 = a
1219  lda = size(a,1)
1220  allocate(ipiv(n))
1221  ldb = size(b,1)
1222  sol = b
1223  call dgesv(n,nrhs,a2,lda,ipiv,sol,ldb,info)
1224  if (info/=0) then
1225  write(*,*) "linear system solver failed...'"
1226  print*,'info: ',info
1227  stop
1228  end if
1229  end subroutine solvels
1230 
1231  !!!============================================================
1232  !-------------------------------------------------
1235  !
1239  !
1240  ! REVISION HISTORY:
1242  !
1243  !-------------------------------------------------
1244  subroutine symsolvels( A, B, sol )
1245  real(dp),intent(in) :: A(:,:),B(:,:)
1246  real(dp),intent(out) :: sol(:,:)
1247  integer,parameter :: wmax=3000 ! max size allowed for work
1248  integer,allocatable :: ipiv(:)
1249  integer :: n,nrhs,lda,ldb,lwork
1250  character :: uplo
1251  real(dp),allocatable :: work(:),A2(:,:)
1252  integer :: info
1253  ! External procedures defined in LAPACK
1254  external dsysv
1255  uplo = 'L'
1256  n = size(a,1)
1257  nrhs = size(b,2)
1258  allocate(a2(n,n))
1259  a2 = a
1260  lda = size(a,1)
1261  allocate(ipiv(n))
1262  ldb = size(b,1)
1263  ! copy B to sol to avoid it to be overwritten
1264  sol = b
1265  ! apparently work must be allocated for the query to dsysv to work...
1266  allocate(work(1))
1267  lwork = -1
1268  call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
1269  lwork = min(wmax,int(work(1)))
1270  !! now lwork is assigned, so actually calculate eigvec/val
1271  deallocate(work)
1272  allocate(work(lwork))
1273  call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
1274  if (info/=0) then
1275  write(*,*) "linear system solver failed...'"
1276  print*,'info: ',info
1277  stop
1278  end if
1279  end subroutine symsolvels
1280 
1281  !!====================================================
1282 
1283 end module ompi_kronlininv
1284 
1285 !!====================================================
1286 !!====================================================
subroutine para_bc1ddp(arr, source)
integer, parameter, public dp
integer, parameter, public masterrank
subroutine, private symsolvels(A, B, sol)
Solves a linear system AX = B for symmetric A, real numbers
subroutine para_bc2ddp(arr, source)
subroutine para_srarr1ddp(source, dest, tag, sendrow, recvrow)
subroutine, private solvels(A, B, sol)
Solves a linear system AX = B, real numbers
subroutine para_barrier()
subroutine para_srint(source, dest, tag, sint, rint)
subroutine para_init()
subroutine para_gathv1ddp(recvcounts, displs, receiver, sendbuf, recvbuf)
integer, public, protected myrank
integer, protected dp_real_type
subroutine spreadwork(nit, nunits, scheduling, looping, startpoint)
integer, parameter, public rexprange
integer, parameter, public pdigits
subroutine, public blockpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, astart, aend, bstart, bend, postC)
Computes a block of the posterior covariance.
subroutine, public calcfactors(G1, G2, G3, Cm1, Cm2, Cm3, Cd1, Cd2, Cd3, U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, iUCmGtiCd1, iUCmGtiCd2, iUCmGtiCd3)
Computes the factors necessary to solve the inverse problem.
This file contains the parallel OpenMPI (distributed memory) version of KronLinInv. OpenMPI and LAPACK libraries are required to be installed in the system.
integer, public, protected numcpus
subroutine, public bandpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, lowdiag, updiag, bandpostC)
Computes a band of the posterior covariance. See http://www.netlib.org/lapack/lug/node124.html
subroutine para_finish()
Procedures to perform linear inversion under gaussian assumptions using the Kronecker-product approac...
subroutine, private symgeneigvv(A, uplo, Bpd, lambda, U)
Computes eigenvalues and eigenvectors of the generalized symmetric definite eigenproblem. See http://www.netlib.org/lapack/lug/node54.html
subroutine para_allgathv1ddp(scheduling, displs, sendbuf, recvbuf)
subroutine timeinfo(totit, curit, startt, loopinfo)
Subroutines/utilities for the parallel version of KronLinInv using OpenMPI.
subroutine, public posteriormean(U1, U2, U3, diaginvlambda, Z1, Z2, Z3, G1, G2, G3, mprior, dobs, postm)
Computes the posterior mean