90 use,
intrinsic :: iso_c_binding
97 write(*,*)
"call MPI_INIT(mpierr): mpierr /= 0" 101 call mpi_comm_rank(mpi_comm_world,
myrank, mpierr)
102 call mpi_comm_size(mpi_comm_world,
numcpus, mpierr)
117 call mpi_finalize(ierror)
124 call mpi_barrier(mpi_comm_world,mpierr)
162 subroutine spreadwork(nit,nunits,scheduling,looping,startpoint)
163 integer,
intent(in) :: nit,nunits,startpoint
164 integer,
allocatable,
intent(out) :: scheduling(:),looping(:,:)
165 integer :: i,nitcpu,resto
166 if (nit<=nunits)
then 167 write(*,*)
myrank,
": spredwork(): nit<=nunits" 168 write(*,*) nit,nunits
171 if (
allocated(scheduling) )
deallocate(scheduling)
172 if (
allocated(looping))
deallocate(looping)
173 allocate(scheduling(nunits),looping(nunits,2))
177 scheduling(:) = nitcpu
178 resto = mod(nit,nunits)
181 scheduling(1:resto) = scheduling(1:resto) + 1
182 looping(1,:) = [startpoint,startpoint+scheduling(1)-1]
184 looping(i,1) = sum(scheduling(1:i-1)) + startpoint
185 looping(i,2) = sum(scheduling(1:i)) + startpoint - 1
195 subroutine para_srint(source,dest,tag,sint,rint)
196 integer,
intent(in) :: source,dest,tag
197 integer,
intent(in) :: sint
198 integer,
intent(inout) :: rint
199 integer :: nelem,mpierr
200 integer :: mpistat(mpi_status_size)
206 call mpi_send(sint, nelem, mpi_integer, dest, tag, &
207 mpi_comm_world, mpierr)
210 call mpi_recv(rint, nelem, mpi_integer, source, tag,&
211 mpi_comm_world, mpistat, mpierr)
221 integer,
intent(in) :: source,dest,tag
222 real(dp),
intent(in) :: sendrow(:)
223 real(dp),
intent(inout) :: recvrow(:)
225 integer :: mpierr,nelem
226 integer :: mpistat(mpi_status_size)
237 call mpi_send(sendrow, nelem,
dp_real_type, dest, tag, &
238 mpi_comm_world, mpierr)
242 call mpi_recv(recvrow, nelem,
dp_real_type, source, tag,&
243 mpi_comm_world, mpistat, mpierr)
253 integer,
intent(in) :: source
255 call mpi_bcast(arr,
size(arr),
dp_real_type, source, mpi_comm_world, mpierr)
262 integer,
intent(in) :: source
264 call mpi_bcast(arr,
size(arr),
dp_real_type, source, mpi_comm_world, mpierr)
270 real(dp),
intent(out) :: recvbuf(:)
271 integer,
intent(in) :: scheduling(:),displs(:)
272 real(dp),
intent(in) :: sendbuf(:)
282 call mpi_allgatherv(sendbuf,
size(sendbuf),
dp_real_type, recvbuf, scheduling, &
289 subroutine para_gathv1ddp( recvcounts, displs, receiver, sendbuf, recvbuf )
290 real(dp),
intent(out) :: recvbuf(:)
291 integer,
intent(in) :: recvcounts(:),receiver,displs(:)
292 real(dp),
intent(in) :: sendbuf(:)
305 call mpi_gatherv(sendbuf,
size(sendbuf),
dp_real_type, recvbuf, recvcounts, &
312 subroutine timeinfo(totit,curit,startt,loopinfo)
313 use,
intrinsic :: iso_fortran_env, only: output_unit
314 character(len=30) :: loopinfo
315 integer :: totit,curit
316 real(dp) :: frac,eta,startt
318 frac =
real(curit,
dp)/(
real(totit,
dp))
319 eta = ( (mpi_wtime()-startt) /
real(curit,dp) ) * &
320 (
real(totit-curit,
dp))
324 write(output_unit,fmt=
'(a5,a34,f7.3,a7,f12.2,1x,a3)') &
325 char(27)//
'[1A'//achar(13),&
326 adjustl(loopinfo)//
': ',&
327 frac*100_dp,
"%, ETA:",eta/60.0,
"min" 354 use,
intrinsic :: iso_fortran_env, only: output_unit
458 subroutine calcfactors(G1,G2,G3,Cm1,Cm2,Cm3,Cd1,Cd2,Cd3,&
459 U1,U2,U3,diaginvlambda,iUCm1,iUCm2,iUCm3,&
460 iUCmGtiCd1,iUCmGtiCd2,iUCmGtiCd3 )
462 real(dp),
intent(in) :: G1(:,:),G2(:,:),G3(:,:),&
463 Cm1(:,:),Cm2(:,:),Cm3(:,:),&
464 Cd1(:,:),Cd2(:,:),Cd3(:,:)
466 real(dp),
intent(out) :: U1(:,:),U2(:,:),U3(:,:),&
467 iUCm1(:,:),iUCm2(:,:),iUCm3(:,:),&
468 iUCmGtiCd1(:,:),iUCmGtiCd2(:,:),iUCmGtiCd3(:,:),&
471 real(dp),
allocatable :: lambda1(:),lambda2(:),lambda3(:)
472 real(dp),
allocatable :: iCdG1(:,:),iCdG2(:,:),iCdG3(:,:),&
473 GtiCdG1(:,:),GtiCdG2(:,:),GtiCdG3(:,:),&
474 ide1(:,:),ide2(:,:),ide3(:,:),&
475 iCd1(:,:),iCd2(:,:),iCd3(:,:)
476 integer :: i,j,k,p,nm,nm1,nm2,nm3,nd1,nd2,nd3
491 print*,
'calcfactors(): compute preliminary stuff' 492 allocate(icdg1(nd1,nm1),icdg2(nd2,nm2),icdg3(nd3,nm3))
497 allocate(gticdg1(nm1,nm1),gticdg2(nm2,nm2),gticdg3(nm3,nm3))
498 gticdg1 = matmul( transpose(g1), icdg1 )
499 gticdg2 = matmul( transpose(g2), icdg2 )
500 gticdg3 = matmul( transpose(g3), icdg3 )
506 print*,
'calcfactors(): compute fa' 507 allocate(lambda1(nm1),lambda2(nm2),lambda3(nm3))
517 print*,
'calcfactors(): compute fb' 522 diaginvlambda(p) = 1.0_dp/(1.0_dp+lambda1(i)*lambda2(j)*lambda3(k))
532 print*,
'calcfactors(): compute fc' 541 print*,
'calcfactors(): compute fd' 542 allocate(ide1(nd1,nd1),ide2(nd2,nd2),ide3(nd3,nd3))
546 forall(i = 1:nd1) ide1(i,i) = 1.0_dp
547 forall(i = 1:nd2) ide2(i,i) = 1.0_dp
548 forall(i = 1:nd3) ide3(i,i) = 1.0_dp
549 allocate(icd1(nd1,nd1),icd2(nd2,nd2),icd3(nd3,nd3))
554 iucmgticd1 = matmul( iucm1, matmul( transpose(g1), icd1 ))
555 iucmgticd2 = matmul( iucm2, matmul( transpose(g2), icd2 ))
556 iucmgticd3 = matmul( iucm3, matmul( transpose(g3), icd3 ))
558 print*,
"calcfactors(): end " 584 iUCm1,iUCm2,iUCm3, astart,aend,bstart,bend, postC)
588 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
589 iUCm2(:,:),iUCm3(:,:)
591 real(dp),
intent(in) :: diaginvlambda(:)
592 real(dp),
intent(out) :: postC(:,:)
593 integer,
intent(in) :: astart,aend,bstart,bend
595 integer :: a,b,irow,i,j,sender
596 real(dp),
allocatable :: row2(:),col1(:)
598 integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb,ntota,ntotb
600 integer,
allocatable :: av(:)
601 integer,
allocatable :: iv(:),jv(:),kv(:)
602 integer :: p,everynit,impicount,mytotit,rirow
603 real(dp) :: startt,firststartt,endt
604 integer,
allocatable :: scheduling(:),looping(:,:)
605 real(dp),
allocatable :: postcmpi(:,:),recvrow(:),sendrow(:)
606 character(len=30) :: loopinfo
609 write(output_unit,*)
"posteriormean(): calculating posterior mean... [using OpenMPI]" 623 write(*,*)
'(Na /= Nb)', na,nb
626 ntota = aend-astart+1
627 ntotb = bend-bstart+1
628 if ( (
size(postc,1)/=ntota) .or. (
size(postc,2)/=ntotb))
then 629 write(*,*)
"Wrong size of the posterior covariance block array." 633 if ( (astart<1) .or. (aend>na) .or. (astart>aend) .or. &
634 (bstart<1) .or. (bend>na) .or. (bstart>bend) )
then 635 write(*,*)
"Wrong size of the requested block array." 640 allocate(av(na),iv(na),jv(na),kv(na))
642 forall(p = 1:na) av(p) = p
647 iv = (av-1)/(nk*nj)+1
648 jv = (av-1-(iv-1)*nk*nj)/nk + 1
649 kv = av-(jv-1)*nk-(iv-1)*nk*nj
652 nr12 =
size(u1,2)*
size(u2,2)*
size(u3,2)
653 nc1 =
size(iucm1,1)*
size(iucm2,1)*
size(iucm3,1)
654 allocate(row2(nr12),col1(nc1))
660 everynit = ntota/1000
664 firststartt = mpi_wtime()
665 loopinfo =
"blockpostcov() " 669 allocate(postcmpi(scheduling(
myrank+1),ntotb),recvrow(ntotb),sendrow(ntotb))
670 mytotit = scheduling(
myrank+1)
682 call timeinfo(mytotit,irow,startt,loopinfo)
688 row2 = u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda
695 col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
698 i = irow-looping(
myrank+1,1)+1
699 postcmpi(i,j) = sum(row2*col1)
709 do i=looping(sender+1,1),looping(sender+1,2)
713 if (
myrank==sender) sendrow = postcmpi(i,:)
725 print*,
"blockpostcov():",endt-firststartt,
" MPI wall clock time" 752 iUCm1,iUCm2,iUCm3, lowdiag, updiag, bandpostC)
756 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
757 iUCm2(:,:),iUCm3(:,:)
759 real(dp),
intent(in) :: diaginvlambda(:)
760 real(dp),
intent(inout) :: bandpostC(:,:)
761 integer,
intent(in) :: lowdiag, updiag
764 real(dp),
allocatable :: row2(:),col1(:),recvrow(:),bandpostCmpi(:)
766 integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb
768 real(dp) :: firststartt,startt
769 integer,
allocatable :: av(:)
770 integer,
allocatable :: iv(:),jv(:),kv(:)
771 integer :: p,aband,aend,bband,astart,d,bband1,bband2,ntota,everynit,mytotit
772 integer,
allocatable :: scheduling(:),looping(:,:)
773 character(len=30) :: loopinfo
774 character(len=12) :: dnum
786 write(*,*)
'(Na /= Nb)', na,nb
789 if ( (updiag>=na) .or. (lowdiag>=na) .or. (lowdiag<0) .or. (updiag<0) )
then 790 write(*,*) .or.
"(updiag<Na) (lowdiag<Na)" 791 write(*,*)
"updiag",updiag,
"Na",na,
"lowdiag",lowdiag,
"Na",na
796 allocate(av(na),iv(na),jv(na),kv(na))
798 forall(p = 1:na) av(p) = p
803 iv = (av-1)/(nk*nj)+1
804 jv = (av-1-(iv-1)*nk*nj)/nk+1
805 kv = av-(jv-1)*nk-(iv-1)*nk*nj
813 nr12 =
size(u1,2)*
size(u2,2)*
size(u3,2)
814 nc1 =
size(iucm1,1)*
size(iucm2,1)*
size(iucm3,1)
815 allocate(row2(nr12),col1(nc1))
829 firststartt = mpi_wtime()
835 allocate(bandpostcmpi(
size(bandpostc,2)), recvrow(
size(bandpostc,2)))
857 ntota = aend-astart+1
858 call spreadwork(ntota,numcpus,scheduling,looping,astart)
862 everynit = ntota/1000
866 loopinfo =
"bandpostcov() diag "//trim(dnum)//
" " 868 if (myrank==masterrank)
write(output_unit,*)
870 bandpostcmpi(:) = 0.0_dp
872 do a=looping(myrank+1,1),looping(myrank+1,2)
874 if ( (myrank==masterrank) .and. (mod(a,everynit)==0) )
then 875 call timeinfo(scheduling(myrank+1),a-astart+1,startt,loopinfo)
888 row2 = diaginvlambda * u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv)
892 col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
897 bandpostcmpi(b) = sum(row2*col1)
907 bband1 = looping(myrank+1,1)+d
908 bband2 = looping(myrank+1,2)+d
910 call para_gathv1ddp(scheduling, looping(:,1)-1, masterrank, &
911 bandpostcmpi(bband1:bband2), recvrow )
912 if (myrank==masterrank) bandpostc(aband,:) = recvrow
948 G1,G2,G3, mprior, dobs, postm)
956 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:), G1(:,:),G2(:,:),G3(:,:)
957 real(dp),
intent(in) :: Z1(:,:),Z2(:,:),Z3(:,:)
958 real(dp),
intent(in) :: mprior(:),dobs(:),diaginvlambda(:)
959 real(dp),
intent(out) :: postm(:)
961 real(dp),
allocatable :: ddiff(:),ddiffmpi(:),postmmpi(:)
963 integer :: Ni,Nj,Nk,Nl,Nm,Nn
965 integer,
allocatable :: av(:),bv(:)
966 integer,
allocatable :: iv(:),jv(:),kv(:),lv(:),mv(:),nv(:)
968 integer :: p,everynit
970 real(dp),
allocatable :: Zh(:),elUDZh(:),Zhmpi(:),elUDZhmpi(:)
973 real(dp) :: startt,firststartt,endt
974 character(len=30) :: loopinfo
976 integer ::curit,totit
977 integer,
allocatable :: scheduling(:),looping(:,:)
995 allocate(zh(na),eludzh(na))
999 allocate(av(na),iv(na),jv(na),kv(na))
1000 allocate(bv(nb),lv(nb),mv(nb),nv(nb))
1001 forall(p = 1:na) av(p) = p
1002 forall(p = 1:nb) bv(p) = p
1006 iv = (av-1)/(nk*nj)+1
1007 jv = (av-1-(iv-1)*nk*nj)/nk+1
1008 kv = av-(jv-1)*nk-(iv-1)*nk*nj
1011 lv = (bv-1)/(nn*nm) + 1
1012 mv = (bv-1-(lv-1)*nn*nm)/nn + 1
1013 nv = bv-(mv-1)*nn-(lv-1)*nn*nm
1019 else if (na<100000)
then 1026 firststartt = mpi_wtime()
1031 startt = mpi_wtime()
1032 loopinfo =
"posteriormean(): loop 1/3" 1036 allocate(ddiffmpi(scheduling(
myrank+1)))
1040 curit = (b-looping(
myrank+1,1)+1)
1042 call timeinfo(totit,curit,startt,loopinfo)
1046 ib = b - looping(
myrank+1,1) + 1
1047 ddiffmpi(ib) = dobs(b) - sum(mprior * g1(lv(b),iv) * g2(mv(b),jv) * g3(nv(b),kv))
1052 startt = mpi_wtime()
1057 print*,
"time for first allgatherv:",mpi_wtime()-startt,
"s" 1064 startt = mpi_wtime()
1066 loopinfo =
"posteriormean(): loop 2/3" 1069 allocate(zhmpi(scheduling(
myrank+1)))
1072 curit =(a-looping(
myrank+1,1)+1)
1074 call timeinfo(totit,curit,startt,loopinfo)
1078 ia = a - looping(
myrank+1,1) + 1
1079 zhmpi(ia) = sum( z1(iv(a),lv) * z2(jv(a),mv) * z3(kv(a),nv) * ddiff )
1084 startt = mpi_wtime()
1088 print*,
"time for second allgatherv:",mpi_wtime()-startt,
"s" 1097 startt = mpi_wtime()
1099 loopinfo =
"posteriormean(): loop 3/3" 1102 allocate(eludzhmpi(scheduling(
myrank+1)),postmmpi(scheduling(
myrank+1)))
1105 curit = (a-looping(
myrank+1,1)+1)
1107 call timeinfo(totit,curit,startt,loopinfo)
1111 ia = a - looping(
myrank+1,1) + 1
1113 eludzhmpi(ia) = sum( u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda * zh )
1115 postmmpi(ia) = mprior(a) + eludzhmpi(ia)
1120 startt = mpi_wtime()
1124 print*,
"time for third allgatherv:",mpi_wtime()-startt,
"s" 1129 write(output_unit,*)
1130 write(output_unit,*)
1132 print*,
"posteriormean():",endt-firststartt,
" MPI wall clock time" 1161 real(dp),
intent(in) :: A(:,:),Bpd(:,:)
1162 character,
intent(in) :: uplo
1163 real(dp),
intent(out) :: lambda(:),U(:,:)
1164 integer :: n,lwork,itype,info
1166 real(dp),
allocatable :: work(:),tmpB(:,:)
1175 if ( (
size(a,2)/=n) .or. (
size(bpd,1)/=n) .or. (
size(bpd,2)/=n) )
then 1176 write(*,*)
"symgeneigvv(): Matrices have wrong sizes" 1187 allocate(work(lwork))
1188 call dsygv(itype,jobz,uplo,n,u,n,tmpb,n,lambda,work,lwork,info)
1189 if (info /= 0) stop
'symgeneigvv(): Matrix eigendecomposition failed!' 1206 subroutine solvels( A, B, sol )
1207 real(dp),
intent(in) :: A(:,:),B(:,:)
1208 real(dp),
intent(out) :: sol(:,:)
1209 integer,
allocatable :: ipiv(:)
1210 integer :: n,nrhs,lda,ldb
1212 real(dp),
allocatable :: A2(:,:)
1223 call dgesv(n,nrhs,a2,lda,ipiv,sol,ldb,info)
1225 write(*,*)
"linear system solver failed...'" 1226 print*,
'info: ',info
1245 real(dp),
intent(in) :: A(:,:),B(:,:)
1246 real(dp),
intent(out) :: sol(:,:)
1247 integer,
parameter :: wmax=3000
1248 integer,
allocatable :: ipiv(:)
1249 integer :: n,nrhs,lda,ldb,lwork
1251 real(dp),
allocatable :: work(:),A2(:,:)
1268 call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
1269 lwork = min(wmax,int(work(1)))
1272 allocate(work(lwork))
1273 call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
1275 write(*,*)
"linear system solver failed...'" 1276 print*,
'info: ',info
subroutine para_bc1ddp(arr, source)
integer, parameter, public dp
integer, parameter, public masterrank
subroutine, private symsolvels(A, B, sol)
Solves a linear system AX = B for symmetric A, real numbers
subroutine para_bc2ddp(arr, source)
subroutine para_srarr1ddp(source, dest, tag, sendrow, recvrow)
subroutine, private solvels(A, B, sol)
Solves a linear system AX = B, real numbers
subroutine para_barrier()
subroutine para_srint(source, dest, tag, sint, rint)
subroutine para_gathv1ddp(recvcounts, displs, receiver, sendbuf, recvbuf)
integer, public, protected myrank
integer, protected dp_real_type
subroutine spreadwork(nit, nunits, scheduling, looping, startpoint)
integer, parameter, public rexprange
integer, parameter, public pdigits
subroutine, public blockpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, astart, aend, bstart, bend, postC)
Computes a block of the posterior covariance.
subroutine, public calcfactors(G1, G2, G3, Cm1, Cm2, Cm3, Cd1, Cd2, Cd3, U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, iUCmGtiCd1, iUCmGtiCd2, iUCmGtiCd3)
Computes the factors necessary to solve the inverse problem.
This file contains the parallel OpenMPI (distributed memory) version of KronLinInv. OpenMPI and LAPACK libraries are required to be installed in the system.
integer, public, protected numcpus
subroutine, public bandpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, lowdiag, updiag, bandpostC)
Computes a band of the posterior covariance. See http://www.netlib.org/lapack/lug/node124.html
Procedures to perform linear inversion under gaussian assumptions using the Kronecker-product approac...
subroutine, private symgeneigvv(A, uplo, Bpd, lambda, U)
Computes eigenvalues and eigenvectors of the generalized symmetric definite eigenproblem. See http://www.netlib.org/lapack/lug/node54.html
subroutine para_allgathv1ddp(scheduling, displs, sendbuf, recvbuf)
subroutine timeinfo(totit, curit, startt, loopinfo)
Subroutines/utilities for the parallel version of KronLinInv using OpenMPI.
subroutine, public posteriormean(U1, U2, U3, diaginvlambda, Z1, Z2, Z3, G1, G2, G3, mprior, dobs, postm)
Computes the posterior mean