c ************************************************************************************
c *This is a parallel bem program to solve 3D elastic problem using 4-8 nodes element*
c *                  based on Gauss-Jordan elimination (in-core)                     *
c ************************************************************************************
      program parbe3de
implicit real*8 (a-h,o-z)
include 'mpif.h'
parameter(mdim=2500)
character*10 filein,fileout,filetst,filetmp
integer ierr,status(mpi_status_size)
dimension nprd(mdim),a(8000000),ia(40000)
c common a(2000000)
c common /comg/ ia(30000)
common /matc/ cc,cc1,cc2,ge,e,pmu,re
common /num/ np,ne
      common/mpi/ myid,nproc
common /qjq/mp 
common /tdh/inp,ipr,itmp,its

call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myid,ierr)
call mpi_comm_size(mpi_comm_world,nproc,ierr)
write(*,*) '进程号=',myid,'  of 总进程数=',nproc

tstart=mpi_wtime()
      mx=mdim

inp=3
ipr=5
itmp=11
its=7
filein='data.txt'
fileout='result.txt'
filetmp='dmp01'
filetst='test001'

      open(inp,file=filein)
open(ipr,file=fileout)
open(its,file=filetst)

      read(inp,*) icheck,e,pmu,re
      write(ipr,20) icheck,e,pmu,re
c if(e.eq.0) goto 300
20    format(/x,7hicheck=,i3,5x,2he=,e12.4,5x,4hpmu=,f5.2,5x,3hre=,
     #       f10.5)
      pa=3.14159265
ge=e/2./(1+pmu)
cc=1./50.26548/(1.-pmu)
cc1=3.-4.*pmu
cc2=1.-2.*pmu
read(inp,*) np,ne,npd
write(ipr,50) np,ne,npd
50    format(/1x,3hnp=,i5,5x,3hne=,i5,5x,4hnpd=,i5)
      n=3*np
m1=24*ne
n1=n+1
n2=n1+n
n3=n2+m1
m2=13*ne
l1=m2+1
l2=l1+npd
60    call input(np,ne,npd,a(1),a(n1),a(n2),a(n3),ia(1),ia(l1),ia(l2)
     #           ,idr)
c      if(icheck.ne.0) goto 300
n72=n3+1
      n8=n72+n
len=n*16
open(itmp,file=filetmp,form='unformatted',access='direct'
     # ,recl=len)
c form distributedly A and F
      tform1=mpi_wtime()

call fbie(n,a(n72),a(1),ia(1),a(n1),a(n2),ia(l2),idr)
      write(its,*) myid,'mp=',mp
tform2=mpi_wtime()
c write(*,*) '进程号=',myid,'  方程组形成时间=',tform2-tform1

c solve parallelly
      tsolv1=mpi_wtime()

call pargj(mx,a(n72),n)

tsolv2=mpi_wtime()
c write(*,*) '进程号=',myid,'  方程组求解时间=',tsolv2-tsolv1

c collect results
      if(nproc.gt.1) then
 if(myid.eq.0) then
   do 11 i=1,mp
     ig=(i-1)*nproc+1
            a(n8-1+3*ig-2)=a(n72-1+3*i-2)
     a(n8-1+3*ig-1)=a(n72-1+3*i-1)
     a(n8-1+3*ig)=a(n72-1+3*i)
11        continue
          do 21 i=1,nproc-1
            call mpi_recv(a(n72),(mp+1)*3,mpi_double_precision,i,i,
     #                    mpi_comm_world,status,ierr)
     do 31 j=1,mp
        jg=(j-1)*nproc+i+1
        if(jg.gt.np) goto 31
               a(n8-1+3*jg-2)=a(n72-1+3*j-2)
        a(n8-1+3*jg-1)=a(n72-1+3*j-1)
        a(n8-1+3*jg)=a(n72-1+3*j)
31          continue
21        continue
 else
          call mpi_send(a(n72),mp*3,mpi_double_precision,0,myid,
     #                  mpi_comm_world,ierr)
 endif
      else
 do 33 jj=0,n-1
   a(n8+jj)=a(n72+jj)
33      continue
      endif

if(myid.eq.0) then
call trdt(a(n1),a(n2),a(n8),ia(1),n)
c get the stress of the boundary nodes
n4=n3+7*np
n5=n4+81
n6=n5+24
call stres(a(1),a(n1),a(n2),a(n3),a(n4),a(n5),a(n6),ia(1))
endif

close(itmp)
    close(its)
close(ipr)
close(inp)

      tend=mpi_wtime()
wtime=tend-tstart
write(*,*) '进程号=',myid,'  总的运行时间=',wtime

      call mpi_finalize(ierr)

end





subroutine capst(stre,pstr)
implicit real*8 (a-h,o-z)
dimension stre(6),torp(6),pstr(3)
ast=(stre(1)+stre(4)+stre(6))/3.0
torp(1)=stre(1)-ast
torp(2)=stre(4)-ast
torp(3)=stre(6)-ast
torp(4)=stre(2)
torp(5)=stre(5)
torp(6)=stre(3)
aj2=(torp(1)**2+torp(2)**2+torp(3)**2)/2+torp(4)**2+
     # torp(5)**2+torp(6)**2
aj3=torp(1)*(torp(2)*torp(3)-torp(5)*torp(5))+torp(4)*
     # (torp(5)*torp(6)-torp(4)*torp(3))
     # +torp(6)*(torp(4)*torp(5)-torp(2)*torp(6))
t=dsqrt(aj2/1.5)
a=aj3*1.414214/t**3
if(dabs(a).gt.1.0) a=dsign(1.0d0,a)
a=dacos(a)/3.0
t=t*1.414214
pstr(1)=t*dcos(a)
a1=3.1415926/3*2
pstr(2)=t*dcos(a+a1)
pstr(3)=t*dcos(a-a1)
do 10 i=2,3
if(pstr(1).gt.pstr(i)) goto 10
b=pstr(1)
pstr(1)=pstr(i)
pstr(i)=b
10    continue
      if(pstr(2).ge.pstr(3)) goto 20
b=pstr(2)
pstr(2)=pstr(3)
pstr(3)=b
20    do 30 i=1,3
30    pstr(i)=pstr(i)+ast
      return
end





subroutine ccf(c1,c2,iep)
implicit real*8 (a-h,o-z)
common /sh/ shp(3,8)
common /xy/ xp(3),xe(3,8),xg(3),xpg(2,3),cni(3),aj
common /tdh/inp,ipr,itmp,its
dimension iep(13)
call shf(shp,c1,c2,iep)
do 10 i=1,3
xg(i)=0
do 10 j=1,4
10    xg(i)=xg(i)+xe(i,j)*shp(3,j)
      do 30 i=1,2
do 30 j=1,3
a1=0
do 20 k=1,4
20    a1=a1+xe(j,k)*shp(i,k)
30    xpg(i,j)=a1
      if(iep(9).le.4) goto 70
do 60 i=5,8
if(iep(i).eq.0) goto 60
do 50 j=1,3
xg(j)=xg(j)+xe(j,i)*shp(3,i)
do 40 k=1,2
40    xpg(k,j)=xpg(k,j)+xe(j,i)*shp(k,i)
50    continue
60    continue
70    g1=xpg(1,2)*xpg(2,3)-xpg(1,3)*xpg(2,2)
      g2=xpg(1,3)*xpg(2,1)-xpg(1,1)*xpg(2,3)
g3=xpg(1,1)*xpg(2,2)-xpg(1,2)*xpg(2,1)
aj=g1*g1+g2*g2+g3*g3
if(aj.le.1e-12) goto 80
aj=sqrt(aj)
a1=1.0/aj
cni(1)=g1*a1
cni(2)=g2*a1
cni(3)=g3*a1
return
80    write(ipr,100) c1,c2,aj,xe
100   format(5x,22h***error of element***/5x,3hc1=,f10.5,5x,3hc2=,
     #f10.5,5x,3haj=,e12.6/8x,1hx,10x,1hy,10x,1hz/(5x,3f11.6))
      stop
end





subroutine cocqf(m1,m2,ik,eps,iep)
implicit real*8 (a-h,o-z)
common /xy/ xp(3),xe(3,8),xgg(13)
dimension r(8),iep(13)
r1=xe(1,4)-xe(1,3)
r2=xe(2,4)-xe(2,3)
r3=xe(3,4)-xe(3,3)
a1=r1*r1+r2*r2+r3*r3
a1=sqrt(a1)
r1=xe(1,3)-xe(1,2)
r2=xe(2,3)-xe(2,2)
r3=xe(3,3)-xe(3,2)
a2=r1*r1+r2*r2+r3*r3
a2=sqrt(a2)
if(ik.eq.0) goto 10
m1=6
m2=m1
goto 120
10    do 20 i=1,4
      r1=xp(1)-xe(1,i)
r2=xp(2)-xe(2,i)
r3=xp(3)-xe(3,i)
r1=r1*r1+r2*r2+r3*r3
20 r(i)=sqrt(r1)
      if(iep(9).le.4) goto 50
do 40 i=5,8
if(iep(i).eq.0) goto 30
r1=xp(1)-xe(1,i)
r2=xp(2)-xe(2,i)
r3=xp(3)-xe(3,i)
r1=r1*r1+r2*r2+r3*r3
r(i)=sqrt(r1)
goto 40
30    r(i)=1.e8
40    continue
50    rmin=r(1)
      k1=8
if(iep(9).le.4) k1=4
do 60 i=2,k1
if(r(i).lt.rmin) rmin=r(i)
60    continue
      m1=6
m2=6
b1=0.25/rmin
r1=b1*a1
r2=b1*a2
do 80 i=2,6
r3=(r1**(2*i))*float(2*i+1)
if(r3.gt.eps) goto 80
m1=i
goto 100
80    continue
100   do 110 i=2,6
      r3=(r2**(2*i))*float(2*i+1)
if(r3.gt.eps) goto 110
m2=i
goto 120
110   continue
120   return
      end





subroutine efg(cofu,coft,u,t,c,ik,iep)
implicit real*8 (a-h,o-z)
common /sh/ shp(3,8)
dimension cofu(3,3,8),coft(3,3,8),u(3,3),t(3,3),iep(13) 
do 50 k=1,8
if(iep(k).eq.0) goto 50
a1=shp(3,k)*c
if(k.eq.ik) goto 20
do 10 i=1,3
do 10 j=1,3
10    coft(i,j,k)=coft(i,j,k)+t(i,j)*a1
20    if(iep(10).eq.0) goto 50
      do 40 i=1,3
if(iep(i+10).eq.0) goto 40
do 30 j=1,3
30    cofu(j,i,k)=cofu(j,i,k)+u(j,i)*a1
40    continue
50    continue
      return
end 




subroutine fbie(n,f,coord,nep,dp,te,nprd,idr)
implicit real*8 (a-h,o-z)
include 'mpif.h'
common /num/np,ne
common /matc/ cc,cc1,cc2,ge,e,pmu,re
common /xy/ xp(3),xe(3,8),xg(3),xpg(2,3),cni(3),aj
common/mpi/myid,nproc
common /qjq/mp 
common /tdh/inp,ipr,itmp,its
dimension a(3,n),f(n),cofu(3,3,8),coft(3,3,8),df(3,8),coord(3,np)
dimension nep(13,ne),dp(3,np),te(3,8,ne),cofd(3,3),nprd(idr)
do 10 i=1,n
f(i)=0.0
10    continue
      mp=0
      do 300 inp=myid+1,np,nproc
mp=mp+1
do 12 i=1,3
do 12 j=1,n
12    a(i,j)=0.0
c write(its,15) inp
c write(*,15) inp
c15    format(1x,10hfbie inp=,i5)
      nrow=3*(inp-1)
      do 20 i=1,3
xp(i)=coord(i,inp)
do 20 j=1,3
20    cofd(i,j)=0.0
      do 200 ine=1,ne
ik=0
do 40 i=1,8
k1=nep(i,ine)
if(k1.eq.0) goto 40
do 30 j=1,3
xe(j,i)=coord(j,k1)
30    df(j,i)=dp(j,k1)
      if(k1.eq.inp) ik=i
40    continue
      call iks(cofu,coft,nep(1,ine),ik)
call fmat(a,n,f,cofu,coft,df,te(1,1,ine),nrow,nep(1,ine))
      do 60 k=1,8
if(nep(k,ine).eq.0) goto 60
do 50 i=1,3
do 50 j=1,3
50    cofd(i,j)=cofd(i,j)-coft(i,j,k)
60    continue
200   continue
      do 240 i=1,3
k1=nrow+i
if(dp(i,inp).lt.1.e-10) goto 240
if(dp(i,inp).gt.1.e7) goto 220
do 210 j=1,3
k2=3*(mp-1)+j
210   f(k2)=f(k2)-cofd(j,i)*dp(i,inp)
      goto 240
220   do 230 j=1,3
      k2=j
230   a(k2,k1)=a(k2,k1)+cofd(j,i)*re
240   continue

c modify the rigid displacement
      do 330 i=1,3
kr=3*(inp-1)+i
do 325 j=1,idr
kl=nprd(j)
if(kr.eq.kl) then
do 320 jj=1,n
a(i,jj)=0.0
320   continue
      a(i,kl)=1.0
f(3*(mp-1)+i)=0.0
else
      a(i,kl)=0.0
endif
325   continue
330   continue

      write(itmp,rec=3*mp-2) (a(1,j),j=1,n)
write(itmp,rec=3*mp-1) (a(2,j),j=1,n)
write(itmp,rec=3*mp) (a(3,j),j=1,n)
300   continue
      return
end






subroutine fmat(a,n,f,cofu,coft,df,tf,n1,iep)
implicit real*8 (a-h,o-z)
common /matc/ cc,cc1,cc2,ge,e,pmu,re
common /qjq/mp
dimension a(3,n),f(n),cofu(3,3,8),coft(3,3,8),df(3,8),tf(3,8)
dimension iep(13)
if(iep(10).eq.0) goto 210
do 60 j=1,3
m1=iep(j+10)
if(m1.eq.0) goto 60
if(m1.eq.1) goto 30
do 20 k=1,8
m1=iep(k)
if(m1.eq.0) goto 20
k1=3*(m1-1)+j
do 10 i=1,3
k2=i
10    a(k2,k1)=a(k2,k1)-cofu(i,j,k)
20    continue
      goto 60
30    do 50 k=1,8
      if(iep(k).eq.0) goto 50
do 40 i=1,3
k2=3*(mp-1)+i
40    f(k2)=f(k2)+cofu(i,j,k)*tf(j,k)/ge
50    continue
60    continue
210   do 240 k=1,8
      k1=iep(k)
if(k1.eq.0) goto 240
k1=3*(k1-1)
do 230 i=1,3
d1=df(i,k)
if(d1.eq.0) goto 230
k3=k1+i
if(d1.lt.1.d7) goto 225
do 220 j=1,3
k2=j
220   a(k2,k3)=a(k2,k3)+coft(j,i,k)*re
      goto 230
225   do 227 j=1,3
      k2=3*(mp-1)+j
227   f(k2)=f(k2)-coft(j,i,k)*d1
230   continue
240   continue
      return
end




subroutine gs4(n,a,b,m)
implicit real*8 (a-h,o-z)
dimension a(n,n),b(n),m(n)
in=n-1
do 10 i=1,n
10    m(i)=i
      do 20 k=1,n
c      write(*,15) k
c15    format(1x,10hgs4*****k=,i5)
      p=0.
do 30 i=k,n
do 30 j=k,n
if(dabs(a(i,j)).le.dabs(p)) goto 30
p=a(i,j)
io=i
jo=j
30    continue
      if(dabs(p)-1.d-13) 200,200,300
200   write(ipr,210) k,p
210   format(1x,30h***** matrix is singular *****,i5,5h   p=e12.4)
      stop
300   if(jo.eq.k) goto 400
      do 40 i=1,n
t=a(i,jo)
a(i,jo)=a(i,k)
40    a(i,k)=t
      j=m(k)
m(k)=m(jo)
m(jo)=j
400   if(io.eq.k) goto 500
      do 50 j=k,n
t=a(io,j)
a(io,j)=a(k,j)
50    a(k,j)=t
      t=b(io)
b(io)=b(k)
b(k)=t
500   p=1.0/p
      if(k.eq.n) goto 600
do 60 j=k,in
60    a(k,j+1)=a(k,j+1)*p
600   b(k)=b(k)*p
      if(k.eq.n) goto 20
do 70 i=k,in
do 80 j=k,in
80    a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
70    b(i+1)=b(i+1)-a(i+1,k)*b(k)
20    continue
      do 90 i1=2,n
i=n+1-i1
do 90 j=i,in
90    b(i)=b(i)-a(i,j+1)*b(j+1)
      do 1 k=1,n
i=m(k)
1     a(1,i)=b(k)
      do 2 k=1,n
2     b(k)=a(1,k)
      return
end






subroutine iks(cofu,coft,iep,ik)
implicit real*8 (a-h,o-z)
common /sh/ shp(3,8)
common /xy/ xp(3),xe(3,8),xg(3),xpg(2,3),cni(3),aj
dimension xgw(8,9),iep(13),cofu(3,3,8),coft(3,3,8),u(3,3),t(3,3)
data xgw /0.0,-0.5773502692,-0.7745966692,-0.8611363116,
     # -0.9061798459,-0.9324695142,-0.9491079123,-0.9602898565,
     #    0.1012285363,0.5773502692,0.0,-0.3399810436,
     #    -0.5384693101,-0.6612093865,-0.7415311856,-0.7966664774,
     #    0.2223810345,0.1294849662,0.7745966692,0.3399810436,
     #    0.0,-0.2386191861,-0.4058451514,-0.5255324099,
     #    0.3137066459,0.2797053915,0.1713244924,0.8611363116,
     #    0.5384693101,0.2386191861,0.0,-0.1834346425,
     #    0.3626837834,0.3818300505,0.3607615731,0.2369268851,
     #    0.9061798459,0.6612093865,0.4058451514,0.1834346425,
     #    0.3626837834,0.4179591837,0.4679139346,0.4786286705,
     #    0.3478548451,0.9324695142,0.7415311856,0.5255324099,
     #    0.3137066459,0.3818300505,0.4679139346,0.5688888889,
     #    0.6521451549,0.5555555556,0.9491079123,0.7966664774,
     #    0.2223810345,0.2797053915,0.3607615731,0.4786286705,
     #    0.6521451549,0.8888888889,1.0,0.9602898565,
     #    0.1012285363,0.1294849662,0.1713244924,0.2369268851,
     #    0.3478548451,0.5555555556,1.0,2.0/
do 10 i=1,3
do 10 j=1,3
do 10 k=1,8
cofu(i,j,k)=0.0
10    coft(i,j,k)=0.0
      eps=1.e-4
call cocqf(ng1,ng2,ik,eps,iep)
do 300 is1=1,ng1
s1=xgw(ng1,is1)
i=9-ng1
j=is1+i
w1=xgw(i,j)
do 300 is2=1,ng2
s2=xgw(ng2,is2)
i=9-ng2
j=is2+i
w2=xgw(i,j)
aa=w1*w2
c1=s1
c2=s2
if(ik.eq.0) goto 200
ac=0.5*(1+s2)
aa=aa*ac
goto (15,20,30,40,50,60,70,80),ik
15    c1=(1+s1)*ac-1.0
      call ker3d(u,t,c1,c2,iep)
c=aa*aj
call efg(cofu,coft,u,t,c,ik,iep)
c1=s2
c2=(1-s1)*ac-1.0
goto 200
20    c1=1-ac*(1-s1)
call ker3d(u,t,c1,c2,iep)
c=aa*aj
call efg(cofu,coft,u,t,c,ik,iep)
c1=-s2
c2=(1+s1)*ac-1.0
goto 200
30    c1=1-ac*(1+s1)
      c2=-s2
      call ker3d(u,t,c1,c2,iep)
c=aa*aj
call efg(cofu,coft,u,t,c,ik,iep)
c1=-s2
c2=1-(1-s1)*ac
goto 200
40    c1=(1-s1)*ac-1.0
      c2=-s2
call ker3d(u,t,c1,c2,iep)
c=aa*aj
call efg(cofu,coft,u,t,c,ik,iep)
c1=s2
c2=1-ac*(1+s1)
goto 200
50    c1=-ac
      c2=(1+s1)*ac-1.0
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=ac
c2=(1-s1)*ac-1.0
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=s1*ac
c2=s2
goto 200
60    c1=1-(1+s1)*ac
      c2=-ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=1-(1-s1)*ac
c2=ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=-s2
c2=s1*ac
goto 200
70    c1=ac
      c2=1-(1+s1)*ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=-ac
c2=1-(1-s1)*ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=-s1*ac
c2=-s2
goto 200
80    c1=(1+s1)*ac-1.0
      c2=ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=(1-s1)*ac-1.0
c2=-ac
call ker3d(u,t,c1,c2,iep)
c=aa*aj*0.5
call efg(cofu,coft,u,t,c,ik,iep)
c1=s2
c2=-s1*ac
200   call ker3d(u,t,c1,c2,iep)
      c=aa*aj
call efg(cofu,coft,u,t,c,ik,iep)
300   continue
      return
end




      
subroutine input(np,ne,npd,xyz,dp,te,wdp,npe,idp,nprd,idr)
implicit real*8 (a-h,o-z)
dimension xyz(3,np),dp(3,np),te(3,8,ne),wdp(3,npd),npe(13,ne)
     # ,idp(npd),p(3),nprd(3*npd)
common /tdh/inp,ipr,itmp,its
read(inp,*) xyz
write(ipr,220) (i,(xyz(j,i),j=1,3),i=1,np)
read(inp,*) npe
write(ipr,240) (i,(npe(j,i),j=1,13),i=1,ne)
do 30 i=1,3
do 10 j=1,np
10    dp(i,j)=1.e8
      do 20 j=1,8
do 20 k=1,ne
20    te(i,j,k)=0.d0
30    continue
      read(inp,*) idp
read(inp,*) wdp
idr=0
do 33 i=1,npd
do 32 j=1,3
if(wdp(j,i).eq.1.d-12) then
idr=idr+1
nprd(idr)=3*(idp(i)-1)+j
endif
32    continue
33    continue
write(ipr,280) (idp(i),(wdp(j,i),j=1,3),i=1,npd)
do 40 i=1,npd
k1=idp(i)
do 40 j=1,3
40    dp(j,k1)=wdp(j,i)
      write(ipr,290) (i,(dp(j,i),j=1,3),i=1,np)
do 100 i=1,ne
k1=npe(10,i)
if(k1.eq.0) goto 100
goto (50,90,100),k1
50    read(inp,*) p
      do 80 j=1,3
if(npe(j+10,i).ne.1) goto 80
do 70 k=1,8
if(npe(k,i).eq.0) goto 70
te(j,k,i)=p(j)
70    continue
80    continue
      goto 100
90    read(inp,*) ((te(j,k,i),j=1,3),k=1,8)
100   continue
      write(ipr,320)
      do 110 i=1,ne
      write(ipr,330) i,((te(j,k,i),k=1,8),j=1,3)
110   continue
220   format(1h0,10x,16hnodal coordinate//6x,4hnode,5x,1hx,9x,1hy,9x,
     #1hz/(5x,i5,3f10.4))
240   format(1h0,10x,20helement--information//(5x,i5,5x,8i5,5x,5i3))
280   format(1h0,10x,24hknown nodal displacement//6x,4hnode,7x,1hu,11x,
     #1hv,11x,1hw/(5x,i5,3d12.4))
290   format(1h0,10x,30hnodal displacement information//6x,4hnode,7x,
     #1hu,11x,1hv,11x,1hw/(5x,i5,3d12.4))
320   format(1h0,10x,32htraction information in elements)
330   format(1h0,7helement,i5/(5x,8f9.3))
      return
end





subroutine ker3d(u,t,c1,c2,iep)
implicit real*8 (a-h,o-z)
common /matc/ cc,cc1,cc2,ge,e,pmu,zzzz
common /sh/ shp(3,8)
common /xy/ xp(3),xe(3,8),xg(3),xpg(2,3),cni(3),aj
dimension u(3,3),t(3,3),iep(10)
call ccf(c1,c2,iep)
w1=xp(1)-xg(1)
w2=xp(2)-xg(2)
w3=xp(3)-xg(3)
v1=w1*w1
v2=w2*w2
v3=w3*w3
rc=sqrt(v1+v2+v3)
rc=1./rc
w1=w1*rc
w2=w2*rc
w3=w3*rc
c=rc*cc
if(iep(10).eq.0) goto 10
u(1,1)=(cc1+w1*w1)*c
u(2,2)=(cc1+w2*w2)*c
u(3,3)=(cc1+w3*w3)*c
u(1,2)=w1*w2*c
u(2,1)=u(1,2)
u(1,3)=w1*w3*c
u(3,1)=u(1,3)
u(2,3)=w2*w3*c
u(3,2)=u(2,3)
10    rc=2.0*rc*c
      rn=cni(1)*w1+cni(2)*w2+cni(3)*w3
c=rc*rn
t(1,1)=c*(cc2+3*w1*w1)
t(2,2)=c*(cc2+3*w2*w2)
t(3,3)=c*(cc2+3*w3*w3)
c=cc2*(w2*cni(1)-w1*cni(2))
a1=3.*rn*w1*w2
t(1,2)=rc*(a1+c)
t(2,1)=rc*(a1-c)
c=cc2*(w3*cni(1)-w1*cni(3))
a1=3.*rn*w1*w3
t(1,3)=rc*(a1+c)
t(3,1)=rc*(a1-c)
c=cc2*(w3*cni(2)-w2*cni(3))
a1=3.*rn*w2*w3
t(2,3)=rc*(a1+c)
t(3,2)=rc*(a1-c)
return
end






subroutine shf(shp,c1,c2,it)
implicit real*8 (a-h,o-z)
dimension shp(3,8),s(4),t(4),it(10)
data s/-0.5,0.5,0.5,-0.5/,t/-0.5,-0.5,0.5,0.5/
do 10 i=1,4
shp(3,i)=(0.5+s(i)*c1)*(0.5+t(i)*c2)
shp(2,i)=t(i)*(0.5+s(i)*c1)
10    shp(1,i)=s(i)*(0.5+t(i)*c2)
      if(it(9).le.4) return
do 20 i=5,8
do 20 j=1,3
20    shp(j,i)=0.0
      s1=0.5*(1-c1*c1)
      t1=0.5*(1.-c2*c2)
if(it(5).eq.0) goto 30
shp(1,5)=-c1*(1.-c2)
shp(2,5)=-s1
shp(3,5)=s1*(1.-c2)
30    if(it(6).eq.0) goto 40
      shp(1,6)=t1
shp(2,6)=-c2*(1+c1)
shp(3,6)=t1*(1.+c1)
40    if(it(7).eq.0) goto 50
      shp(1,7)=-c1*(1.+c2)
shp(2,7)=s1
shp(3,7)=s1*(1+c2)
50    if(it(8).eq.0) goto 60
      shp(1,8)=-t1
shp(2,8)=-c2*(1.-c1)
shp(3,8)=t1*(1.-c1)
60    k=8
      do 80 i=1,4
l=i+4
do 70 j=1,3
70    shp(j,i)=shp(j,i)-0.5*(shp(j,k)+shp(j,l))
80    k=l
      return
end






subroutine stres(xyz,dp,te,stnp,as,ue,fs,nep)
implicit real*8 (a-h,o-z)
common /matc/ cc,cc1,cc2,ge,e,pmu,re
common /num/ np,ne
common /sh/ shp(3,8)
common /xy/ xp(3),xe(3,8),xg(3),xpg(2,3),cni(3),aj
common /tdh/inp,ipr,itmp,its
dimension xyz(3,np),dp(3,np),te(3,8,ne),stnp(7,np),as(9,9)
      dimension ste(6),dpks(2,8),ue(3,8),fs(9),nep(13,ne),ms(9)
data dpks/-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,1.0,0.0,
     # -1.0,1.0,0.0,0.0,1.0,-1.0,0.0/
do 10 i=1,7
do 10 j=1,np
10    stnp(i,j)=0.0
      a1=1./cc2
a2=1.-pmu
a3=2.*ge
a4=2*a1*a2
a5=pmu*a1
a6=2.*a5
write(ipr,510)
do 400 ine=1,ne
write(ipr,520) ine
do 30 i=1,8
k1=nep(i,ine)
if(k1.eq.0) goto 30
do 20 j=1,3
xe(j,i)=xyz(j,k1)
20    ue(j,i)=dp(j,k1)
30    continue
      do 300 inp=1,8
if(nep(inp,ine).eq.0) goto 300
do 40 i=1,9
fs(i)=0.0
do 40 j=1,9
40    as(j,i)=0.0
      c1=dpks(1,inp)
c2=dpks(2,inp)
call ccf(c1,c2,nep(1,ine))
do 60 i=1,3
fs(i)=te(i,inp,ine)/ge
do 60 j=1,2
k1=2*i+j+1
a9=0.0
do 50 k=1,8
if(nep(k,ine).eq.0) goto 50
a9=a9+shp(j,k)*ue(i,k)
50    continue
      fs(k1)=a9
60    continue
      do 65 i=1,3
as(4,i)=xpg(1,i)
as(5,i)=xpg(2,i)
as(6,i+3)=as(4,i)
as(7,i+3)=as(5,i)
as(8,i+6)=as(4,i)
65    as(9,i+6)=as(5,i)
      j=1
do 70 i=1,3
as(i,j)=a4*cni(i)
70    j=j+4
      as(1,5)=a6*cni(1)
as(2,1)=a6*cni(2)
as(3,1)=a6*cni(3)
as(1,9)=as(1,5)
as(2,9)=as(2,1)
as(3,5)=as(3,1)
as(1,2)=cni(2)
as(2,2)=cni(1)
as(1,3)=cni(3)
as(3,3)=cni(1)
as(1,4)=cni(2)
as(2,4)=cni(1)
as(2,6)=cni(3)
as(3,6)=cni(2)
as(1,7)=cni(3)
as(3,7)=cni(1)
as(2,8)=cni(3)
as(3,8)=cni(2)
call gs4(9,as,fs,ms)
em=fs(1)+fs(5)+fs(9)
em=a5*em
ste(1)=(fs(1)+em)*a3
ste(4)=(fs(5)+em)*a3
ste(6)=(fs(9)+em)*a3
ste(2)=(fs(2)+fs(4))*ge
ste(5)=(fs(6)+fs(8))*ge
ste(3)=(fs(3)+fs(7))*ge
write(ipr,530) inp,nep(inp,ine),(te(i,inp,ine),i=1,3),ste
if(nep(9,ine).eq.3.and.inp.eq.1) goto 300
k2=nep(inp,ine)
do 80 i=1,6
80    stnp(i,k2)=stnp(i,k2)+ste(i)
      stnp(7,k2)=stnp(7,k2)+1.0
300   continue
400   continue
      do 410 i=1,np
do 410 j=1,6
410   stnp(j,i)=stnp(j,i)/stnp(7,i)
      write(ipr,540) (i,i=1,3)
do 420 i=1,np
call capst(stnp(1,i),xp)
write(ipr,550) i,(stnp(j,i),j=1,6),xp
420   continue
      return
510   format(1h0,19x,40h****************************************/
     #           20x,40h*  the traction components and stresses*/
     #           20x,40h*          in boundary element         */
     #           20x,40h****************************************)
520   format(1h0,11helement NO.,i4/1x,7hNO.node,3x,2htx,6x,2hty,
     #       6x,2htz,6x,4hstxx,4x,4hstxy,4x,4hstxz,4x,4hstyy,4x,4hstyz,
     #       4x,4hstzz)
530   format(1x,i2,i4,9f10.2)
540   format(1h0,19x,40h****************************************/
     #           20x,40h*  stresses and principal stresses     */
     #           20x,40h*           at nodal points            */
     #           20x,40h****************************************//
     #           1x,4hnode,3x,4hstxx,4x,4hstxy,4x,4hstxz,4x,4hstyy,4x
     #           ,4hstyz,4x,4hstzz,3(5x,2hst,i1)) 
550   format(1x,i4,9f10.2)
      end






subroutine trdt(dp,te,f,nep,n)
implicit real*8 (a-h,o-z)
common /num/ np,ne
common /matc/ cc,cc1,cc2,ge,e,pmu,re
common /tdh/inp,ipr,itmp,its
dimension dp(3,np),te(3,8,ne),f(n),nep(13,ne)
do 30 l=1,ne
if(nep(10,l).eq.0) goto 30
do 20 i=1,3
if(nep(i+10,l).ne.-1) goto 20
do 10 j=1,8
k1=nep(j,l)
if(k1.eq.0) goto 10
k2=3*(k1-1)+i
te(i,j,l)=f(k2)*ge
10    continue
20    continue
30    continue
      do 40 j=1,np
do 40 i=1,3
if(dp(i,j).lt.1.d7) goto 40
k1=3*(j-1)+i
dp(i,j)=f(k1)*re
40    continue
      write(ipr,100)
do 50 i=1,np
write(ipr,110) i,(dp(j,i),j=1,3)
50    continue
100   format(1h0,14x,30h******************************/
     #           15x,30h*                            */
     #           15x,30h*     node displacement      */
     #           15x,30h*                            */
     #           15x,30h******************************//
     #           7x,3hNO.,7x,1hu,14x,1hv,14x,1hw)
110   format(5x,i5,3f15.9)
      return
end





      subroutine pargj(mx,b,n)
c gauss-jordan elimination for the same number block of each processor
implicit real*8 (a-h,o-z)
include 'mpif.h'
dimension a(mx,n),b(mx),buf(3,n+1)
integer ierr
common /num/ np,ne
common/mpi/myid,nproc
common /qjq/mp
common /tdh/inp,ipr,itmp,its

      do 5 i=1,mp
irec1=3*i-2
irec2=irec1+1
irec3=irec2+1
      read(itmp,rec=irec1) (a(3*i-2,k),k=1,n)
read(itmp,rec=irec2) (a(3*i-1,k),k=1,n)
read(itmp,rec=irec3) (a(3*i,k),k=1,n)
      write(its,300) (a(3*i-2,k),k=1,n)
write(its,300) (a(3*i-1,k),k=1,n)
write(its,300) (a(3*i,k),k=1,n)
5     continue
      call mpi_barrier(mpi_comm_world,ierr)
      write(its,300) (b(j),j=1,3*mp)
300   format(60e10.3)
ng=np
      ipg=1
ipl=1
200 ig=(ipl-1)*nproc+(myid+1)
      if(myid.eq.0) write(*,*) 'ipg=',ipg
c      write(*,*) 'ipg=',ipg
if(ig.eq.ipg) then
c eliminating N.O. 1 column
c=a(3*ipl-2,3*ipg-2)
do 10 i=3*ipg-1,n
      a(3*ipl-2,i)=a(3*ipl-2,i)/c
10    continue
      b(3*ipl-2)=b(3*ipl-2)/c
      do 40 i=3*ipg-1,n
a(3*ipl-1,i)=a(3*ipl-1,i)-a(3*ipl-1,3*ipg-2)*a(3*ipl-2,i)
40    continue
      b(3*ipl-1)=b(3*ipl-1)-a(3*ipl-1,3*ipg-2)*b(3*ipl-2)
do 45 i=3*ipg-1,n
a(3*ipl,i)=a(3*ipl,i)-a(3*ipl,3*ipg-2)*a(3*ipl-2,i)
45    continue
      b(3*ipl)=b(3*ipl)-a(3*ipl,3*ipg-2)*b(3*ipl-2)
c eliminating N.O. 2 colomn
c=a(3*ipl-1,3*ipg-1)
      do 50 i=3*ipg,n
a(3*ipl-1,i)=a(3*ipl-1,i)/c
50    continue
      b(3*ipl-1)=b(3*ipl-1)/c
      do 60 i=3*ipg,n
a(3*ipl-2,i)=a(3*ipl-2,i)-a(3*ipl-2,3*ipg-1)*a(3*ipl-1,i)
60    continue
      b(3*ipl-2)=b(3*ipl-2)-a(3*ipl-2,3*ipg-1)*b(3*ipl-1)
      do 65 i=3*ipg,n
a(3*ipl,i)=a(3*ipl,i)-a(3*ipl,3*ipg-1)*a(3*ipl-1,i)
65    continue
      b(3*ipl)=b(3*ipl)-a(3*ipl,3*ipg-1)*b(3*ipl-1)
c eliminating N.O. 3 column
c=a(3*ipl,3*ipg)
      do 66 i=3*ipg+1,n
a(3*ipl,i)=a(3*ipl,i)/c
buf(3,i)=a(3*ipl,i)
66    continue
      b(3*ipl)=b(3*ipl)/c
buf(3,n+1)=b(3*ipl)
      do 67 i=3*ipg+1,n
a(3*ipl-2,i)=a(3*ipl-2,i)-a(3*ipl-2,3*ipg)*a(3*ipl,i)
buf(1,i)=a(3*ipl-2,i)
67    continue
      b(3*ipl-2)=b(3*ipl-2)-a(3*ipl-2,3*ipg)*b(3*ipl)
buf(1,n+1)=b(3*ipl-2)
      do 68 i=3*ipg+1,n
a(3*ipl-1,i)=a(3*ipl-1,i)-a(3*ipl-1,3*ipg)*a(3*ipl,i)
      buf(2,i)=a(3*ipl-1,i)
68    continue
      b(3*ipl-1)=b(3*ipl-1)-a(3*ipl-1,3*ipg)*b(3*ipl)
buf(2,n+1)=b(3*ipl-1)      
write(its,*) 'buf0',n,ipg,3*(n-3*ipg)+3
c write(its,300) ((buf(ii,jj),jj=1,n),ii=1,3)
call mpi_bcast(buf(1,3*ipg+1),3*(n-3*ipg)+3,mpi_double_precision
     # ,myid,mpi_comm_world,ierr)
ipl=ipl+1
else
iroot=mod(ipg-1,nproc)
write(its,*) 'buf1',n,ipg,3*(n-3*ipg)+3
call mpi_bcast(buf(1,3*ipg+1),3*(n-3*ipg)+3,mpi_double_precision
     # ,iroot,mpi_comm_world,ierr)
c write(its,*) 'buf'
c write(its,300) ((buf(ii,jj),jj=1,n),ii=1,3)
endif

      do 20 j=1,mp
jg=(j-1)*nproc+myid+1
if(jg.eq.ipg) goto 20
c write(its,*) 'jg=',jg,'mpbl=',mpbl
do 30 k=3*ipg+1,n
a(3*j-2,k)=a(3*j-2,k)-a(3*j-2,3*ipg-2)*buf(1,k)
a(3*j-1,k)=a(3*j-1,k)-a(3*j-1,3*ipg-2)*buf(1,k)
a(3*j,k)=a(3*j,k)-a(3*j,3*ipg-2)*buf(1,k)
30    continue
      do 32 k=3*ipg+1,n
a(3*j-2,k)=a(3*j-2,k)-a(3*j-2,3*ipg-1)*buf(2,k)
a(3*j-1,k)=a(3*j-1,k)-a(3*j-1,3*ipg-1)*buf(2,k)
a(3*j,k)=a(3*j,k)-a(3*j,3*ipg-1)*buf(2,k)
32    continue
      do 33 k=3*ipg+1,n
a(3*j-2,k)=a(3*j-2,k)-a(3*j-2,3*ipg)*buf(3,k)
a(3*j-1,k)=a(3*j-1,k)-a(3*j-1,3*ipg)*buf(3,k)
a(3*j,k)=a(3*j,k)-a(3*j,3*ipg)*buf(3,k)
33    continue
      b(3*j-2)=b(3*j-2)-a(3*j-2,3*ipg-2)*buf(1,n+1)
b(3*j-1)=b(3*j-1)-a(3*j-1,3*ipg-2)*buf(1,n+1)
b(3*j)=b(3*j)-a(3*j,3*ipg-2)*buf(1,n+1)
      b(3*j-2)=b(3*j-2)-a(3*j-2,3*ipg-1)*buf(2,n+1)
b(3*j-1)=b(3*j-1)-a(3*j-1,3*ipg-1)*buf(2,n+1)
b(3*j)=b(3*j)-a(3*j,3*ipg-1)*buf(2,n+1)
      b(3*j-2)=b(3*j-2)-a(3*j-2,3*ipg)*buf(3,n+1)
b(3*j-1)=b(3*j-1)-a(3*j-1,3*ipg)*buf(3,n+1)
b(3*j)=b(3*j)-a(3*j,3*ipg)*buf(3,n+1)
20    continue
ipg=ipg+1
if(ipg.le.ng) goto 200
return
end