Stephen Pickles wrote:
> Is this a known problem? Should I wait for it to be fixed or should
> I work around it?
Hi Stephen,
This sure seems like a bug to me -- I'm attaching the WHIRL-to-Fortran
output, where it's clear that in method1() no extra computations are
being exposed, while in method2() the compiler is unrolling the outer
loops 4x4.
I will file a bug in our internal tracking system on this. I have no
idea when it might be addressed, however.
Thanks for pointing this out,
John
--
John Baron jbaron@xxxxxxx
SGI Performance Engineering and Math Libraries C ***********************************************************
C Fortran file translated from WHIRL Tue May 8 13:42:54 2001
C ***********************************************************
PROGRAM MAIN
IMPLICIT NONE
C
C **** Variables and functions ****
C
REAL(8) A(4_8, 4_8)
REAL(8) B(4_8, 4_8, 80_8)
REAL(8) C(4_8, 4_8, 80_8)
REAL(8) D(4_8, 4_8, 80_8)
REAL(8) DIFF
INTEGER(4) I
INTEGER(4) ICOUNT
REAL(4) T
REAL(4) T0
REAL(4) T1
EXTERNAL _CPU_TIME_4
C
C **** Temporary variables ****
C
INTEGER(8) f90li_1_1
INTEGER(8) f90li_2_1
INTEGER(8) f90li_0_1
INTEGER(8) f90li_1_2
INTEGER(8) f90li_2_2
INTEGER(8) f90li_0_2
INTEGER(4) I0
C
C **** statements ****
C
ICOUNT = 1024000000
CALL makemats(4, 4, 4, 80, A, B)
DO f90li_1_1 = 0, 3, 1
DO f90li_2_1 = 0, 3, 2
DO f90li_0_1 = 0, 79, 1
C(f90li_2_1 + 1, f90li_1_1 + 1, f90li_0_1 + 1) = 0.0D00
C(f90li_2_1 + 2_8, f90li_1_1 + 1, f90li_0_1 + 1) = 0.0D00
END DO
END DO
END DO
CALL _CPU_TIME_4(T0)
DO I = 1, 100000, 1
CALL method1(4, 4, 4, 80, A, B, C)
END DO
CALL _CPU_TIME_4(T1)
T = (T1 - T0)
WRITE(6, *) 'speed(1) =', (REAL(ICOUNT) /((T * 1.0E+06)))
DO f90li_1_2 = 0, 3, 1
DO f90li_2_2 = 0, 3, 2
DO f90li_0_2 = 0, 79, 1
D(f90li_2_2 + 1, f90li_1_2 + 1, f90li_0_2 + 1) = 0.0D00
D(f90li_2_2 + 2_8, f90li_1_2 + 1, f90li_0_2 + 1) = 0.0D00
END DO
END DO
END DO
CALL _CPU_TIME_4(T0)
DO I0 = 1, 100000, 1
CALL method2(4, 4, 4, 80, A, B, D)
END DO
CALL _CPU_TIME_4(T1)
T = (T1 - T0)
WRITE(6, *) 'speed(2) =', (REAL(ICOUNT) /((T * 1.0E+06)))
CALL diff2(1280, C, D, DIFF)
WRITE(6, *) 'discrepancy =', DIFF
STOP
END
SUBROUTINE makemats(NDIM1, NDIM2, NDIM3, VECLEN, A, B)
IMPLICIT NONE
INTEGER(4) NDIM1
INTEGER(4) NDIM2
INTEGER(4) NDIM3
INTEGER(4) VECLEN
REAL(8) A(t$5, t$6)
REAL(8) B(t$7, t$6, t$8)
C
C **** Variables and functions ****
C
INTEGER(8) t$5
INTEGER(8) t$9
INTEGER(8) t$6
INTEGER(8) t$11
INTEGER(8) t$7
INTEGER(8) t$13
INTEGER(8) t$8
INTEGER(8) t$16
INTEGER(4) I
INTEGER(4) J
INTEGER(4) K
INTEGER(8) t$10
INTEGER(8) t$12
INTEGER(8) t$14
INTEGER(8) t$15
INTEGER(8) t$17
C
C **** Temporary variables ****
C
INTEGER(4) tmp0_I_0
INTEGER(4) wd_J
INTEGER(4) I1
INTEGER(4) wd_$wd_J
INTEGER(4) I2
INTEGER(4) J0
INTEGER(4) I0
INTEGER(4) wd_J1
INTEGER(4) I3
INTEGER(4) wd_$wd_J1
INTEGER(4) I4
INTEGER(4) wd_K
INTEGER(4) J1
INTEGER(4) I5
INTEGER(4) wd_J0
INTEGER(4) I6
INTEGER(4) wd_$wd_J0
INTEGER(4) I7
C
C **** statements ****
C
t$5 = NDIM1
t$6 = NDIM3
t$9 = MAX(NDIM1, 0)
t$10 = (MAX(NDIM1, 0) * 2)
t$11 = MAX(NDIM3, 0)
t$12 = (MAX(NDIM1, 0) * MAX(NDIM3, 0))
t$7 = NDIM2
t$8 = VECLEN
t$13 = MAX(NDIM2, 0)
t$14 = (MAX(NDIM2, 0) * 2)
t$15 = ((MAX(NDIM2, 0) * MAX(NDIM3, 0)) * 2)
t$16 = MAX(VECLEN, 0)
t$17 = (MAX(NDIM2, 0) *(MAX(NDIM3, 0) * MAX(VECLEN, 0)))
DO J = 1, (NDIM3 + -7_8), 8
IF(NDIM1 .GE. 1) THEN
tmp0_I_0 = (J + K)
DO I = 1, NDIM1, 1
A(I, J) = DBLE((J +(I + K)))
A(I, J + 1) = DBLE(((I + tmp0_I_0) + 1))
A(I, J + 2) = DBLE(((I + tmp0_I_0) + 2))
A(I, J + 3) = DBLE(((I + tmp0_I_0) + 3))
A(I, J + 4) = DBLE(((I + tmp0_I_0) + 4))
A(I, J + 5) = DBLE(((I + tmp0_I_0) + 5))
A(I, J + 6) = DBLE(((I + tmp0_I_0) + 6))
A(I, J + 7) = DBLE(((I + tmp0_I_0) + 7))
END DO
ENDIF
END DO
DO wd_J = J, NDIM3 + -1, 2
DO I1 = 1, NDIM1, 1
A(I1, wd_J) = DBLE((wd_J +(K + I1)))
A(I1, wd_J + 1) = DBLE(((wd_J +(K + I1)) + 1))
END DO
END DO
DO wd_$wd_J = wd_J, NDIM3, 1
DO I2 = 1, NDIM1, 1
A(I2, wd_$wd_J) = DBLE((wd_$wd_J +(K + I2)))
END DO
END DO
DO K = 1, VECLEN + -1, 2
DO J0 = 1, (NDIM3 + -5_8), 6
DO I0 = 1, NDIM2, 1
B(I0, J0, K) = DBLE(((K - I0) - J0))
B(I0, J0, K + 1) = DBLE((((K - I0) - J0) + 1))
B(I0, J0 + 1, K) = DBLE(((K - I0) -(J0 + 1)))
B(I0, J0 + 1, K + 1) = DBLE((((K - I0) -(J0 + 1)) + 1))
B(I0, J0 + 2, K) = DBLE(((K - I0) -(J0 + 2)))
B(I0, J0 + 2, K + 1) = DBLE((((K - I0) -(J0 + 2)) + 1))
B(I0, J0 + 3, K) = DBLE(((K - I0) -(J0 + 3)))
B(I0, J0 + 3, K + 1) = DBLE((((K - I0) -(J0 + 3)) + 1))
B(I0, J0 + 4, K) = DBLE(((K - I0) -(J0 + 4)))
B(I0, J0 + 4, K + 1) = DBLE((((K - I0) -(J0 + 4)) + 1))
B(I0, J0 + 5, K) = DBLE(((K - I0) -(J0 + 5)))
B(I0, J0 + 5, K + 1) = DBLE((((K - I0) -(J0 + 5)) + 1))
END DO
END DO
DO wd_J1 = J0, NDIM3 + -1, 2
DO I3 = 1, NDIM2, 1
B(I3, wd_J1, K) = DBLE(((K - I3) - wd_J1))
B(I3, wd_J1, K + 1) = DBLE((((K - I3) - wd_J1) + 1))
B(I3, wd_J1 + 1, K) = DBLE(((K - I3) -(wd_J1 + 1)))
B(I3, wd_J1 + 1, K + 1) = DBLE((((K - I3) -(wd_J1 + 1)) + 1))
END DO
END DO
DO wd_$wd_J1 = wd_J1, NDIM3, 1
DO I4 = 1, NDIM2, 1
B(I4, wd_$wd_J1, K) = DBLE(((K - I4) - wd_$wd_J1))
B(I4, wd_$wd_J1, K + 1) = DBLE((((K - I4) - wd_$wd_J1) + 1))
END DO
END DO
END DO
DO wd_K = K, VECLEN, 1
DO J1 = 1, (NDIM3 + -5_8), 6
DO I5 = 1, NDIM2, 1
B(I5, J1, wd_K) = DBLE(((wd_K - I5) - J1))
B(I5, J1 + 1, wd_K) = DBLE(((wd_K - I5) -(J1 + 1)))
B(I5, J1 + 2, wd_K) = DBLE(((wd_K - I5) -(J1 + 2)))
B(I5, J1 + 3, wd_K) = DBLE(((wd_K - I5) -(J1 + 3)))
B(I5, J1 + 4, wd_K) = DBLE(((wd_K - I5) -(J1 + 4)))
B(I5, J1 + 5, wd_K) = DBLE(((wd_K - I5) -(J1 + 5)))
END DO
END DO
DO wd_J0 = J1, NDIM3 + -1, 2
DO I6 = 1, NDIM2, 1
B(I6, wd_J0, wd_K) = DBLE(((wd_K - I6) - wd_J0))
B(I6, wd_J0 + 1, wd_K) = DBLE(((wd_K - I6) -(wd_J0 + 1)))
END DO
END DO
DO wd_$wd_J0 = wd_J0, NDIM3, 1
DO I7 = 1, NDIM2, 1
B(I7, wd_$wd_J0, wd_K) = DBLE(((wd_K - I7) - wd_$wd_J0))
END DO
END DO
END DO
RETURN
END SUBROUTINE
SUBROUTINE diff2(N, C, D, DIFF)
IMPLICIT NONE
INTEGER(4) N
REAL(8) C(t$18)
REAL(8) D(t$18)
REAL(8) DIFF
C
C **** Variables and functions ****
C
INTEGER(8) t$18
INTEGER(8) t$19
INTEGER(4) I
C
C **** statements ****
C
t$18 = N
t$19 = MAX(N, 0)
DIFF = 0.0D00
DO I = 1, N, 1
DIFF = (DIFF +(((C(I) - D(I))) ** 2))
END DO
RETURN
END SUBROUTINE
SUBROUTINE method1(NDIM1, NDIM2, NDIM3, VECLEN, A, B, C)
IMPLICIT NONE
INTEGER(4) NDIM1
INTEGER(4) NDIM2
INTEGER(4) NDIM3
INTEGER(4) VECLEN
REAL(8) A((t$21) - -1_8, (t$22) - -1_8)
REAL(8) B((t$23) - -1_8, (t$22) - -1_8, (t$24) - -1_8)
REAL(8) C((t$23) - -1_8, (t$21) - -1_8, (t$24) - -1_8)
C
C **** Variables and functions ****
C
INTEGER(8) t$21
INTEGER(8) t$26
INTEGER(8) t$22
INTEGER(8) t$29
INTEGER(8) t$23
INTEGER(8) t$32
INTEGER(8) t$24
INTEGER(8) t$36
INTEGER(4) I
INTEGER(4) N1
INTEGER(4) N2
INTEGER(4) N3
INTEGER(8) t$25
INTEGER(8) t$27
INTEGER(8) t$28
INTEGER(8) t$30
INTEGER(8) t$31
INTEGER(8) t$33
INTEGER(8) t$34
INTEGER(8) t$35
INTEGER(8) t$37
INTEGER(8) t$38
INTEGER(8) t$39
C
C **** Temporary variables ****
C
REAL(8) mi0
C
C **** statements ****
C
t$21 = INT8((NDIM1 + -1))
t$22 = INT8((NDIM3 + -1))
t$25 = (INT8((NDIM1 + -1)) + 1)
t$26 = MAX((INT8((NDIM1 + -1)) + 1), 0)
t$27 = (MAX((INT8((NDIM1 + -1)) + 1), 0) * 2)
t$28 = (INT8((NDIM3 + -1)) + 1)
t$29 = MAX((INT8((NDIM3 + -1)) + 1), 0)
t$30 = (MAX((INT8((NDIM1 + -1)) + 1), 0) * MAX((INT8((NDIM3 + -1)) +
1), 0))
t$23 = INT8((NDIM2 + -1))
t$24 = INT8((VECLEN + -1))
t$31 = (INT8((NDIM2 + -1)) + 1)
t$32 = MAX((INT8((NDIM2 + -1)) + 1), 0)
t$33 = (MAX((INT8((NDIM2 + -1)) + 1), 0) * 2)
t$34 = ((MAX((INT8((NDIM2 + -1)) + 1), 0) * MAX((INT8((NDIM3 + -1)) +
1), 0)) * 2)
t$35 = (INT8((VECLEN + -1)) + 1)
t$36 = MAX((INT8((VECLEN + -1)) + 1), 0)
t$37 = (MAX((INT8((NDIM2 + -1)) + 1), 0) *(MAX((INT8((NDIM3 + -1)) +
1), 0) * MAX((INT8((VECLEN + -1)) + 1), 0)))
t$38 = ((MAX((INT8((NDIM1 + -1)) + 1), 0) * MAX((INT8((NDIM2 + -1)) +
1), 0)) * 2)
t$39 = (MAX((INT8((NDIM2 + -1)) + 1), 0) *(MAX((INT8((NDIM1 + -1)) +
1), 0) * MAX((INT8((VECLEN + -1)) + 1), 0)))
IF(NDIM2 .GE. 1) THEN
DO I = 0, VECLEN + -1, 1
DO N3 = 0, NDIM3 + -1, 1
DO N1 = 0, NDIM1 + -1, 1
mi0 = A(N1 + 1, N3 + 1)
DO N2 = 0, NDIM2 + -1, 1
C(N2 + 1, N1 + 1, I + 1) = (C(N2 + 1, N1 + 1, I + 1) +(B(N2 +
1, N3 + 1, I + 1) * mi0))
END DO
END DO
END DO
END DO
ENDIF
RETURN
END SUBROUTINE
SUBROUTINE method2(NDIM1, NDIM2, NDIM3, VECLEN, A, B, C)
IMPLICIT NONE
INTEGER(4) NDIM1
INTEGER(4) NDIM2
INTEGER(4) NDIM3
INTEGER(4) VECLEN
REAL(8) A(t$40, t$41)
REAL(8) B(t$42, t$41, t$43)
REAL(8) C(t$42, t$40, t$43)
C
C **** Variables and functions ****
C
INTEGER(8) t$40
INTEGER(8) t$44
INTEGER(8) t$41
INTEGER(8) t$46
INTEGER(8) t$42
INTEGER(8) t$48
INTEGER(8) t$43
INTEGER(8) t$51
INTEGER(4) I
INTEGER(4) N1
INTEGER(4) N2
INTEGER(4) N3
INTEGER(8) t$45
INTEGER(8) t$47
INTEGER(8) t$49
INTEGER(8) t$50
INTEGER(8) t$52
INTEGER(8) t$53
INTEGER(8) t$54
C
C **** Temporary variables ****
C
REAL(8) mi1
REAL(8) mi2
REAL(8) mi3
REAL(8) mi4
REAL(8) mi5
REAL(8) mi6
REAL(8) mi7
REAL(8) mi8
REAL(8) mi9
REAL(8) mi10
REAL(8) mi11
REAL(8) mi12
REAL(8) mi13
REAL(8) mi14
REAL(8) mi15
REAL(8) mi16
INTEGER(4) wd_N
REAL(8) mi17
REAL(8) mi18
REAL(8) mi19
REAL(8) mi20
INTEGER(4) N0
INTEGER(4) wd_N3
INTEGER(4) N
REAL(8) mi21
REAL(8) mi22
REAL(8) mi23
REAL(8) mi24
INTEGER(4) N4
INTEGER(4) wd_N1
REAL(8) mi25
INTEGER(4) N5
C
C **** statements ****
C
t$40 = NDIM1
t$41 = NDIM3
t$44 = MAX(NDIM1, 0)
t$45 = (MAX(NDIM1, 0) * 2)
t$46 = MAX(NDIM3, 0)
t$47 = (MAX(NDIM1, 0) * MAX(NDIM3, 0))
t$42 = NDIM2
t$43 = VECLEN
t$48 = MAX(NDIM2, 0)
t$49 = (MAX(NDIM2, 0) * 2)
t$50 = ((MAX(NDIM2, 0) * MAX(NDIM3, 0)) * 2)
t$51 = MAX(VECLEN, 0)
t$52 = (MAX(NDIM2, 0) *(MAX(NDIM3, 0) * MAX(VECLEN, 0)))
t$53 = ((MAX(NDIM1, 0) * MAX(NDIM2, 0)) * 2)
t$54 = (MAX(NDIM2, 0) *(MAX(NDIM1, 0) * MAX(VECLEN, 0)))
DO I = 1, VECLEN, 1
DO N3 = 1, (NDIM3 + -3_8), 4
DO N1 = 1, (NDIM1 + -3_8), 4
IF(NDIM2 .GE. 1) THEN
mi1 = A(N1, N3)
mi2 = A(N1 + 3, N3 + 3)
mi3 = A(N1 + 3, N3 + 2)
mi4 = A(N1 + 3, N3 + 1)
mi5 = A(N1 + 3, N3)
mi6 = A(N1, N3 + 1)
mi7 = A(N1 + 2, N3 + 3)
mi8 = A(N1 + 2, N3 + 2)
mi9 = A(N1 + 2, N3 + 1)
mi10 = A(N1 + 2, N3)
mi11 = A(N1, N3 + 2)
mi12 = A(N1 + 1, N3 + 3)
mi13 = A(N1 + 1, N3 + 2)
mi14 = A(N1 + 1, N3 + 1)
mi15 = A(N1 + 1, N3)
mi16 = A(N1, N3 + 3)
DO N2 = 1, NDIM2, 1
C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3, I) * mi1))
C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 1, I) * mi6))
C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 2, I) * mi11))
C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 3, I) * mi16))
C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3, I) * mi15))
C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 1, I) *
mi14))
C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 2, I) *
mi13))
C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 3, I) *
mi12))
C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3, I) * mi10))
C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 1, I) *
mi9))
C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 2, I) *
mi8))
C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 3, I) *
mi7))
C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3, I) * mi5))
C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 1, I) *
mi4))
C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 2, I) *
mi3))
C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 3, I) *
mi2))
END DO
ENDIF
END DO
IF(NDIM2 .GE. 1) THEN
DO wd_N = N1, NDIM1, 1
mi17 = A(wd_N, N3)
mi18 = A(wd_N, N3 + 1)
mi19 = A(wd_N, N3 + 3)
mi20 = A(wd_N, N3 + 2)
DO N0 = 1, NDIM2, 1
C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3, I) * mi17))
C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 1, I) * mi18))
C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 2, I) * mi20))
C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 3, I) * mi19))
END DO
END DO
ENDIF
END DO
DO wd_N3 = N3, NDIM3, 1
DO N = 1, (NDIM1 + -3_8), 4
IF(NDIM2 .GE. 1) THEN
mi21 = A(N, wd_N3)
mi22 = A(N + 3, wd_N3)
mi23 = A(N + 1, wd_N3)
mi24 = A(N + 2, wd_N3)
DO N4 = 1, NDIM2, 1
C(N4, N, I) = (C(N4, N, I) +(B(N4, wd_N3, I) * mi21))
C(N4, N + 1, I) = (C(N4, N + 1, I) +(B(N4, wd_N3, I) * mi23))
C(N4, N + 2, I) = (C(N4, N + 2, I) +(B(N4, wd_N3, I) * mi24))
C(N4, N + 3, I) = (C(N4, N + 3, I) +(B(N4, wd_N3, I) * mi22))
END DO
ENDIF
END DO
IF(NDIM2 .GE. 1) THEN
DO wd_N1 = N, NDIM1, 1
mi25 = A(wd_N1, wd_N3)
DO N5 = 1, NDIM2, 1
C(N5, wd_N1, I) = (C(N5, wd_N1, I) +(B(N5, wd_N3, I) * mi25))
END DO
END DO
ENDIF
END DO
END DO
RETURN
END SUBROUTINE
|