下面是我用最简单mpi的代码,绿色部分是mpich2最简单的CALL语句,我发现用4核心计算的速度跟串行算法没有变化。

我确认用wmpiexec运行EXE文件,确实是4个核心都在算。但速度就是跟一个核心一样,希望高人帮忙。


program main

!INCLUDE 'link_fnl_shared.h' 
!use numerical_libraries
IMPLICIT REAL*8(A-H,O-Z)
 include 'mpif.h'
EXTERNAL THETA,SGN
INTEGER NM,L,INIX,INIY,LX,LY,LZ,LU,LV,JKL
INTEGER IERR,numprocs,myid   !@MPI added statement
LOGICAL TFNAN
COMPLEX R,M,MI,RI,TC,TCX,ZC,U,K
DIMENSION R(2,2),M(2,2),MI(2,2),RI(2,2),TC(2,2)
PARAMETER(NM=1801,NX=1501,NY=1501)
DIMENSION U(NM),K(NM),RTT(NX,NY)
COMMON/CNST/A,VF,PI
!OPEN(36,FILE='curr_60_1801.dat')
!OPEN(26,FILE='NAN.dat')
ZEROX=0.0
PI=3.14159265
VF=6.58212E2   !\hbar v_f
A=0.0  !unit: nm
B=60.0  !unit: nm
UL0=-30
UR0=+30
! XMU0=(UL0+UR0)/2.0  定义这个变量XMU0,纯属多此一举 (Chinese Proverb)
XH=(B-A)/(NM-1.0)
ALPHH=2.0*PI/(NY-1)
NVB=76


!@MPI-----------------------------------------------------------------------------------------------
      call MPI_INIT( ierr )
      call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr )
      call MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ierr )
      print *, "Process ", myid, " of ", numprocs, " is alive"
!@MPI-----------------------------------------------------------------------------------------------    
       call MPI_BCAST(NVB,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)


DO JVB=myid+1,NVB,numprocs


VB=(JVB-1)*2.0-50.0   


XMUL=VB/2.0  !@ XMUL=XMU0+VB/2.0    !\mu_L
XMUR=-VB/2.0 !@ XMUR=XMU0-VB/2.0    !\mu_R
UR=UR0+XMUR                         !加上eV 的作用是降低了原来的电位差(UR-UL)到(UR-UL-eV)
UL=UL0+XMUL                         !加上eV 的作用是降低了原来的电位差(UR-UL)到(UR-UL-eV)
ESTEP1=(VB)/(NX-1)             !!!!!!!!!!!!!!!!!!!!!!!!!!             !NEW LOOP


DO JKM=1,NX
E=XMUR+(JKM-1)*ESTEP1             !\int_{\mu_R}^{\mu_L} dE
DO JKL=1,NY
beta1=(JKL-1)*ALPHH-PI+0.000000000314159265                      !-pi<beta1<pi
YKY=(E-UL)/VF*sin(beta1)




CALL FUC(NM,YKY,E,U,K,B,UL,UR)
CALL MATRIXCONST(NM,TC,XH,U,K,CURR)



if(myid==0) THEN
WRITE(*,*) NVB, VB, CURR
ENDIF
!WRITE(36,14) VB,CURR
ENDDO

call MPI_FINALIZE(rc)


END