program matmul_test implicit none integer veclen,nruns,icount,i integer ndim1,ndim2,ndim3 parameter (veclen=80,nruns=100000) parameter (ndim1=4,ndim2=4,ndim3=4) real t, t0, t1 real*8 a(ndim1,ndim3) real*8 b(ndim2,ndim3,veclen) real*8 c(ndim2,ndim1,veclen) real*8 d(ndim2,ndim1,veclen) real*8 diff icount=2*ndim1*ndim2*ndim3*veclen*nruns call makemats(ndim1,ndim2,ndim3,veclen,a,b) c=0.0 call cpu_time(t0) do i=1,nruns call method1(ndim1,ndim2,ndim3,veclen,a,b,c) enddo call cpu_time(t1) t = t1-t0 write(6,*)'speed(1) =',real(icount)/(1000000.*t) d=0.0 call cpu_time(t0) do i=1,nruns call method2(ndim1,ndim2,ndim3,veclen,a,b,d) enddo call cpu_time(t1) t = t1-t0 write(6,*)'speed(2) =',real(icount)/(1000000.*t) call diff2(ndim2*ndim3*veclen,c,d,diff) write(6,*)'discrepancy =',diff stop end program matmul_test subroutine makemats(ndim1,ndim2,ndim3,veclen,a,b) implicit none integer ndim1,ndim2,ndim3,veclen,i,j,k real*8 a(ndim1,ndim3) real*8 b(ndim2,ndim3,veclen) do j=1,ndim3 do i=1,ndim1 a(i,j)=k+i+j enddo enddo do k=1,veclen do j=1,ndim3 do i=1,ndim2 b(i,j,k)=k-i-j enddo enddo enddo return end subroutine makemats subroutine diff2(n,c,d,diff) implicit none integer n,i real*8 c(n), d(n) real*8 diff diff = 0.0 do i=1,n diff = diff + (c(i)-d(i))**2 enddo return end subroutine diff2 subroutine method1(ndim1,ndim2,ndim3,veclen,a,b,c) implicit none integer, intent(in) :: ndim1,ndim2,ndim3,veclen real*8, intent(in) :: a(0:ndim1-1,0:ndim3-1) real*8, intent(in) :: b(0:ndim2-1,0:ndim3-1,0:veclen-1) real*8, intent(inout) :: c(0:ndim2-1,0:ndim1-1,0:veclen-1) integer :: i,n1,n2,n3 do i=0,veclen-1 do n3=0,ndim3-1 do n1=0,ndim1-1 do n2=0,ndim2-1 c(n2,n1,i)=c(n2,n1,i)+a(n1,n3)*b(n2,n3,i) enddo enddo enddo enddo return end subroutine subroutine method2(ndim1,ndim2,ndim3,veclen,a,b,c) implicit none integer, intent(in) :: ndim1,ndim2,ndim3,veclen real*8, intent(in) :: a(ndim1,ndim3) real*8, intent(in) :: b(ndim2,ndim3,veclen) real*8, intent(inout) :: c(ndim2,ndim1,veclen) integer :: i,n1,n2,n3 do i=1,veclen do n3=1,ndim3 do n1=1,ndim1 do n2=1,ndim2 c(n2,n1,i)=c(n2,n1,i)+a(n1,n3)*b(n2,n3,i) enddo enddo enddo enddo return end subroutine