76 use,
intrinsic :: iso_fortran_env, only: output_unit
184 subroutine calcfactors(G1,G2,G3,Cm1,Cm2,Cm3,Cd1,Cd2,Cd3,&
185 U1,U2,U3,diaginvlambda,iUCm1,iUCm2,iUCm3,&
186 iUCmGtiCd1,iUCmGtiCd2,iUCmGtiCd3 )
188 real(dp),
intent(in) :: G1(:,:),G2(:,:),G3(:,:),&
189 Cm1(:,:),Cm2(:,:),Cm3(:,:),&
190 Cd1(:,:),Cd2(:,:),Cd3(:,:)
192 real(dp),
intent(out) :: U1(:,:),U2(:,:),U3(:,:),&
193 iUCm1(:,:),iUCm2(:,:),iUCm3(:,:),&
194 iUCmGtiCd1(:,:),iUCmGtiCd2(:,:),iUCmGtiCd3(:,:),&
197 real(dp),
allocatable :: lambda1(:),lambda2(:),lambda3(:)
198 real(dp),
allocatable :: iCdG1(:,:),iCdG2(:,:),iCdG3(:,:),&
199 GtiCdG1(:,:),GtiCdG2(:,:),GtiCdG3(:,:),&
200 ide1(:,:),ide2(:,:),ide3(:,:),&
201 iCd1(:,:),iCd2(:,:),iCd3(:,:)
202 integer :: i,j,k,p,nm,nm1,nm2,nm3,nd1,nd2,nd3
217 print*,
'calcfactors(): compute preliminary stuff' 218 allocate(icdg1(nd1,nm1),icdg2(nd2,nm2),icdg3(nd3,nm3))
224 gticdg1 = matmul( transpose(g1), icdg1 )
225 gticdg2 = matmul( transpose(g2), icdg2 )
226 gticdg3 = matmul( transpose(g3), icdg3 )
232 print*,
'calcfactors(): compute fa' 233 allocate(lambda1(nm1),lambda2(nm2),lambda3(nm3))
243 print*,
'calcfactors():compute fb' 248 diaginvlambda(p) = 1.0_dp/(1.0_dp+lambda1(i)*lambda2(j)*lambda3(k))
258 print*,
'calcfactors(): compute fc' 266 print*,
'calcfactors(): compute fd' 267 allocate(ide1(nd1,nd1),ide2(nd2,nd2),ide3(nd3,nd3))
271 forall(i = 1:nd1) ide1(i,i) = 1.0_dp
272 forall(i = 1:nd2) ide2(i,i) = 1.0_dp
273 forall(i = 1:nd3) ide3(i,i) = 1.0_dp
274 allocate(icd1(nd1,nd1),icd2(nd2,nd2),icd3(nd3,nd3))
279 iucmgticd1 = matmul( iucm1, matmul( transpose(g1), icd1 ))
280 iucmgticd2 = matmul( iucm2, matmul( transpose(g2), icd2 ))
281 iucmgticd3 = matmul( iucm3, matmul( transpose(g3), icd3 ))
288 print*,
'calcfactors(): end ' 313 iUCm1,iUCm2,iUCm3, astart,aend,bstart,bend, postC)
317 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
318 iUCm2(:,:),iUCm3(:,:)
320 real(dp),
intent(in) :: diaginvlambda(:)
321 real(dp),
intent(out) :: postC(:,:)
322 integer,
intent(in) :: astart,aend,bstart,bend
325 real(dp),
allocatable :: row1(:),row2(:),col1(:)
327 integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb
329 integer,
allocatable :: av(:)
330 integer,
allocatable :: iv(:),jv(:),kv(:)
331 integer :: p,csha1,csha2,everynit,nthr,privcount,totcount
332 real(dp) :: eta,frac,startt,endt
345 write(*,*)
'(Na /= Nb)', na,nb
348 csha1 = aend-astart+1
349 csha2 = bend-bstart+1
350 if ( (
size(postc,1)/=csha1) .or. (
size(postc,2)/=csha2))
then 351 write(*,*)
"Wrong size of the posterior covariance block array." 355 if ( (astart<1) .or. (aend>na) .or. (astart>aend) .or. &
356 (bstart<1) .or. (bend>na) .or. (bstart>bend) )
then 357 write(*,*)
"Wrong size of the requested block array." 362 allocate(av(na),iv(na),jv(na),kv(na))
364 forall(p = 1:na) av(p) = p
369 iv = (av-1)/(nk*nj)+1
370 jv = (av-1-(iv-1)*nk*nj)/nk + 1
371 kv = av-(jv-1)*nk-(iv-1)*nk*nj
374 nr12 =
size(u1,2)*
size(u2,2)*
size(u3,2)
375 nc1 =
size(iucm1,1)*
size(iucm2,1)*
size(iucm3,1)
376 allocate(row1(nr12),row2(nr12),col1(nc1))
379 if (aend-astart+1<20)
then 381 else if (aend-astart+1<100000)
then 382 everynit = (aend-astart+1)/100
384 everynit = (aend-astart+1)/1000
390 startt = omp_get_wtime()
391 nthr = omp_get_num_threads()
393 totcount = aend-astart+1
395 if ( omp_get_thread_num()==0 )
write(output_unit,*)
398 privcount = privcount + 1
399 if ( (omp_get_thread_num()==0 ) .and. (mod(privcount,everynit/nthr)==0) )
then 400 frac =
real(privcount,
dp)/(
real(totcount,
dp)/
real(nthr,
dp))
401 eta = ( (omp_get_wtime()-startt) /
real(privcount,dp) ) * &
402 (
real(totcount-nthr*privcount,
dp)/
real(nthr,
dp))
403 write(output_unit,fmt=
'(a19,f7.3,6x,a4,f12.2,1x,a3,a5)')
'blockpostcov(): %',&
404 frac*100_dp,
"ETA:",eta/60.0,
"min",char(27)//
'[1A'//achar(13)
411 row2 = u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda
417 col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
420 postc(a,b) = sum(row2*col1)
425 endt = omp_get_wtime()
428 print*,
"blockpostcov():",endt-startt,
" OMP wall clock time" 452 iUCm1,iUCm2,iUCm3, lowdiag, updiag, bandpostC)
456 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:),iUCm1(:,:), &
457 iUCm2(:,:),iUCm3(:,:)
459 real(dp),
intent(in) :: diaginvlambda(:)
460 real(dp),
intent(inout) :: bandpostC(:,:)
461 integer,
intent(in) :: lowdiag, updiag
464 real(dp),
allocatable :: row1(:),row2(:),col1(:)
466 integer :: Ni,Nj,Nk,Nl,Nm,Nn,Na,Nb
468 integer,
allocatable :: av(:)
469 integer,
allocatable :: iv(:),jv(:),kv(:)
470 integer :: p,aband,aend,bband,astart,d
471 integer :: nthr,privcount,everynit
472 real(dp) :: eta,frac,startt,firststartt,endt
484 write(*,*)
'(Na /= Nb)', na,nb
487 if ( (updiag>=na) .or. (lowdiag>=na) .or. (lowdiag<0) .or. (updiag<0) )
then 488 write(*,*) .or.
"(updiag<Na) (lowdiag<Na)" 489 write(*,*)
"updiag",updiag,
"Na",na,
"lowdiag",lowdiag,
"Na",na
494 allocate(av(na),iv(na),jv(na),kv(na))
496 forall(p = 1:na) av(p) = p
501 iv = (av-1)/(nk*nj)+1
502 jv = (av-1-(iv-1)*nk*nj)/nk+1
503 kv = av-(jv-1)*nk-(iv-1)*nk*nj
511 nr12 =
size(u1,2)*
size(u2,2)*
size(u3,2)
512 nc1 =
size(iucm1,1)*
size(iucm2,1)*
size(iucm3,1)
513 allocate(row1(nr12),row2(nr12),col1(nc1))
534 firststartt = omp_get_wtime()
550 print*,
"Diagonal ",d,
" from range [",-lowdiag,
",",updiag,
"]" 554 startt = omp_get_wtime()
555 nthr = omp_get_num_threads()
559 privcount = privcount + 1
560 if ( (omp_get_thread_num()==0 ) .and. (mod(privcount,everynit/nthr)==0) )
then 561 frac =
real(privcount,dp)/(
real(nb,dp)/
real(nthr,dp))
562 eta = ( (omp_get_wtime()-startt) /
real(privcount,dp) ) * &
563 (
real(aend-astart+1-nthr*privcount,dp)/
real(nthr,dp))
564 write(output_unit,fmt=
'(a27,f7.3,6x,a4,f12.2,1x,a3,a5)')
'bandpostcov(): %',&
565 frac*100_dp,
"ETA:",eta/60.0,
"min",char(27)//
'[1A'//achar(13)
575 row2 = diaginvlambda * u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv)
579 col1 = iucm1(iv,iv(b)) * iucm2(jv,jv(b)) * iucm3(kv,kv(b))
583 bandpostc(aband,bband) = sum(row2*col1)
588 endt = omp_get_wtime()
591 print*,
"bandpostcov():",endt-firststartt,
" OMP wall clock time" 626 G1,G2,G3, mprior, dobs, postm)
630 real(dp),
intent(in) :: U1(:,:),U2(:,:),U3(:,:), G1(:,:),G2(:,:),G3(:,:)
631 real(dp),
intent(in) :: Z1(:,:),Z2(:,:),Z3(:,:)
632 real(dp),
intent(in) :: mprior(:),dobs(:),diaginvlambda(:)
633 real(dp),
intent(out) :: postm(:)
635 real(dp),
allocatable :: row2(:),col1(:),bigmatrow(:),datrow(:)
636 real(dp),
allocatable :: ddiff(:)
637 integer :: Nr12,Nc1,a,b,Na,Nb
638 integer :: Ni,Nj,Nk,Nl,Nm,Nn
640 integer,
allocatable :: av(:),bv(:)
641 integer,
allocatable :: iv(:),jv(:),kv(:),lv(:),mv(:),nv(:)
643 integer :: p,j,i,everynit
645 real(dp),
allocatable :: Zh(:),elUDZh(:)
646 real(dp) :: datp,elg,elrowud,tZZ
648 integer :: nsteps,hr,min,nthread,privcount,nthr
649 real(dp) :: eta,et1,et2,sec,startt,firststartt,endt,finish,frac
650 character(8) :: curdate
651 character(10) :: curtime
653 write(output_unit,*)
"posteriormean(): calculating posterior mean... [using OpenMP]" 671 allocate(zh(na),eludzh(na))
675 allocate(av(na),iv(na),jv(na),kv(na))
676 allocate(bv(nb),lv(nb),mv(nb),nv(nb))
677 forall(p = 1:na) av(p) = p
678 forall(p = 1:nb) bv(p) = p
682 iv = (av-1)/(nk*nj)+1
683 jv = (av-1-(iv-1)*nk*nj)/nk+1
684 kv = av-(jv-1)*nk-(iv-1)*nk*nj
687 lv = (bv-1)/(nn*nm) + 1
688 mv = (bv-1-(lv-1)*nn*nm)/nn + 1
689 nv = bv-(mv-1)*nn-(lv-1)*nn*nm
695 else if (na<100000)
then 704 firststartt = omp_get_wtime()
705 nthr = omp_get_num_threads()
710 startt= omp_get_wtime()
715 privcount = privcount + 1
716 if ( (omp_get_thread_num()==0 ) .and. (mod(privcount,everynit/nthr)==0) )
then 717 frac =
real(privcount,dp)/(
real(nb,dp)/
real(nthr,dp))
718 eta = ( (omp_get_wtime()-startt) /
real(privcount,dp) ) * &
719 (
real(nb-nthr*privcount,dp)/
real(nthr,dp))
720 write(output_unit,fmt=
'(a27,f7.3,6x,a4,f12.2,1x,a3,a5)')
'posteriormean() loop 1/3: %',&
721 frac*100_dp,
"ETA:",eta/60.0,
"min",char(27)//
'[1A'//achar(13)
726 ddiff(b) = dobs(b) - sum(mprior * g1(lv(b),iv) * g2(mv(b),jv) * g3(nv(b),kv))
736 startt= omp_get_wtime()
739 privcount = privcount + 1
740 if ( (omp_get_thread_num()==0 ) .and. (mod(privcount,everynit/nthr)==0) )
then 741 frac =
real(privcount,dp)/(
real(na,dp)/
real(nthr,dp))
742 eta = ( (omp_get_wtime()-startt) /
real(privcount,dp) ) * &
743 (
real(na-nthr*privcount,dp)/
real(nthr,dp))
744 write(output_unit,fmt=
'(a27,f7.3,6x,a4,f12.2,1x,a3,a5)')
'posteriormean() loop 2/3: %',&
745 frac*100_dp,
"ETA:",eta/60.0,
"min",char(27)//
'[1A'//achar(13)
750 zh(a) = sum( z1(iv(a),lv) * z2(jv(a),mv) * z3(kv(a),nv) * ddiff )
763 startt= omp_get_wtime()
766 privcount = privcount + 1
767 if ( (omp_get_thread_num()==0 ) .and. (mod(privcount,everynit/nthr)==0) )
then 768 frac =
real(privcount,dp)/(
real(na,dp)/
real(nthr,dp))
769 eta = ( (omp_get_wtime()-startt) /
real(privcount,dp) ) * &
770 (
real(na-nthr*privcount,dp)/
real(nthr,dp))
771 write(output_unit,fmt=
'(a27,f7.3,6x,a4,f12.2,1x,a3,a5)')
'posteriormean() loop 3/3: %',&
772 frac*100_dp,
"ETA:",eta/60.0,
"min",char(27)//
'[1A'//achar(13)
777 eludzh(a) = sum( u1(iv(a),iv) * u2(jv(a),jv) * u3(kv(a),kv) * diaginvlambda * zh )
779 postm(a) = mprior(a) + eludzh(a)
784 endt = omp_get_wtime()
787 print*,
"posteriormean():",endt-firststartt,
" OMP wall clock time" 811 real(dp),
intent(in) :: A(:,:),Bpd(:,:)
812 character,
intent(in) :: uplo
813 real(dp),
intent(out) :: lambda(:),U(:,:)
814 integer :: n,lwork,itype,info
816 real(dp),
allocatable :: work(:),tmpB(:,:)
825 if ( (
size(a,2)/=n) .or. (
size(bpd,1)/=n) .or. (
size(bpd,2)/=n) )
then 826 write(*,*)
"symgeneigvv(): Matrices have wrong sizes" 837 allocate(work(lwork))
838 call dsygv(itype,jobz,uplo,n,u,n,tmpb,n,lambda,work,lwork,info)
839 if (info /= 0) stop
'symgeneigvv(): Matrix eigendecomposition failed!' 856 subroutine solvels( A, B, sol )
857 real(dp),
intent(in) :: A(:,:),B(:,:)
858 real(dp),
intent(out) :: sol(:,:)
859 integer,
allocatable :: ipiv(:)
860 integer :: n,nrhs,lda,ldb
862 real(dp),
allocatable :: A2(:,:)
873 call dgesv(n,nrhs,a2,lda,ipiv,sol,ldb,info)
875 write(*,*)
"linear system solver failed...'" 895 real(dp),
intent(in) :: A(:,:),B(:,:)
896 real(dp),
intent(out) :: sol(:,:)
897 integer,
parameter :: wmax=3000
898 integer,
allocatable :: ipiv(:)
899 integer :: n,nrhs,lda,ldb,lwork
901 real(dp),
allocatable :: work(:),A2(:,:)
918 call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
919 lwork = min(wmax,int(work(1)))
922 allocate(work(lwork))
923 call dsysv(uplo,n,nrhs,a2,lda,ipiv,sol,ldb,work,lwork,info)
925 write(*,*)
"linear system solver failed...'" subroutine, public bandpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, lowdiag, updiag, bandpostC)
Computes a band of the posterior covariance. See http://www.netlib.org/lapack/lug/node124.html
integer, parameter, public dp
subroutine, public calcfactors(G1, G2, G3, Cm1, Cm2, Cm3, Cd1, Cd2, Cd3, U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, iUCmGtiCd1, iUCmGtiCd2, iUCmGtiCd3)
Computes the factors necessary to solve the inverse problem.
subroutine, private symgeneigvv(A, uplo, Bpd, lambda, U)
Computes eigenvalues and eigenvectors of the generalized symmetric definite eigenproblem. See http://www.netlib.org/lapack/lug/node54.html
subroutine, public blockpostcov(U1, U2, U3, diaginvlambda, iUCm1, iUCm2, iUCm3, astart, aend, bstart, bend, postC)
Computes a block of the posterior covariance.
integer, parameter, public rexprange
subroutine, private symsolvels(A, B, sol)
Solves a linear system AX = B for symmetric A, real numbers
integer, parameter, public pdigits
subroutine, public posteriormean(U1, U2, U3, diaginvlambda, Z1, Z2, Z3, G1, G2, G3, mprior, dobs, postm)
Computes the posterior mean
This file contains the parallel OpenMPI (distributed memory) version of KronLinInv. OpenMPI and LAPACK libraries are required to be installed in the system.
Procedures to perform linear inversion under gaussian assumptions using the Kronecker-product approac...
subroutine, private solvels(A, B, sol)
Solves a linear system AX = B, real numbers