pro64-support
[Top] [All Lists]

Re: fortran performance and array indexing

To: Stephen Pickles <s.pickles@xxxxxxxxx>
Subject: Re: fortran performance and array indexing
From: John Baron <jbaron@xxxxxxx>
Date: Tue, 08 May 2001 16:00:06 -0700
Cc: Pro64 Support <pro64-support@xxxxxxxxxxx>
Organization: SGI Compute Intensive Applications
References: <Pine.BSF.4.21.0105081555460.68431-200000@nessie.mcc.ac.uk>
Reply-to: jbaron@xxxxxxx
Sender: owner-pro64-support@xxxxxxxxxxx
Stephen Pickles wrote:

> Is this a known problem? Should I wait for it to be fixed or should
> I work around it?

Hi Stephen,

This sure seems like a bug to me -- I'm attaching the WHIRL-to-Fortran
output, where it's clear that in method1() no extra computations are
being exposed, while in method2() the compiler is unrolling the outer
loops 4x4.

I will file a bug in our internal tracking system on this.  I have no
idea when it might be addressed, however.

Thanks for pointing this out,

John

-- 
John Baron                                              jbaron@xxxxxxx
SGI Performance Engineering and Math Libraries
C ***********************************************************
C Fortran file translated from WHIRL Tue May  8 13:42:54 2001
C ***********************************************************

        PROGRAM MAIN
        IMPLICIT NONE
C
C       **** Variables and functions ****
C
        REAL(8) A(4_8, 4_8)
        REAL(8) B(4_8, 4_8, 80_8)
        REAL(8) C(4_8, 4_8, 80_8)
        REAL(8) D(4_8, 4_8, 80_8)
        REAL(8) DIFF
        INTEGER(4) I
        INTEGER(4) ICOUNT
        REAL(4) T
        REAL(4) T0
        REAL(4) T1
        
        
        
        
        EXTERNAL _CPU_TIME_4
C
C       **** Temporary variables ****
C
        INTEGER(8) f90li_1_1
        INTEGER(8) f90li_2_1
        INTEGER(8) f90li_0_1
        INTEGER(8) f90li_1_2
        INTEGER(8) f90li_2_2
        INTEGER(8) f90li_0_2
        INTEGER(4) I0
C
C       **** statements ****
C
        ICOUNT = 1024000000
        CALL makemats(4, 4, 4, 80, A, B)
        DO f90li_1_1 = 0, 3, 1
          DO f90li_2_1 = 0, 3, 2
            DO f90li_0_1 = 0, 79, 1
              C(f90li_2_1 + 1, f90li_1_1 + 1, f90li_0_1 + 1) = 0.0D00
              C(f90li_2_1 + 2_8, f90li_1_1 + 1, f90li_0_1 + 1) = 0.0D00
            END DO
          END DO
        END DO
        CALL _CPU_TIME_4(T0)
        DO I = 1, 100000, 1
          CALL method1(4, 4, 4, 80, A, B, C)
        END DO
        CALL _CPU_TIME_4(T1)
        T = (T1 - T0)
        WRITE(6, *) 'speed(1) =', (REAL(ICOUNT) /((T * 1.0E+06)))
        DO f90li_1_2 = 0, 3, 1
          DO f90li_2_2 = 0, 3, 2
            DO f90li_0_2 = 0, 79, 1
              D(f90li_2_2 + 1, f90li_1_2 + 1, f90li_0_2 + 1) = 0.0D00
              D(f90li_2_2 + 2_8, f90li_1_2 + 1, f90li_0_2 + 1) = 0.0D00
            END DO
          END DO
        END DO
        CALL _CPU_TIME_4(T0)
        DO I0 = 1, 100000, 1
          CALL method2(4, 4, 4, 80, A, B, D)
        END DO
        CALL _CPU_TIME_4(T1)
        T = (T1 - T0)
        WRITE(6, *) 'speed(2) =', (REAL(ICOUNT) /((T * 1.0E+06)))
        CALL diff2(1280, C, D, DIFF)
        WRITE(6, *) 'discrepancy =', DIFF
        STOP
        END

        SUBROUTINE makemats(NDIM1, NDIM2, NDIM3, VECLEN, A, B)
        IMPLICIT NONE
        INTEGER(4) NDIM1
        INTEGER(4) NDIM2
        INTEGER(4) NDIM3
        INTEGER(4) VECLEN
        REAL(8) A(t$5, t$6)
        REAL(8) B(t$7, t$6, t$8)
C
C       **** Variables and functions ****
C
        INTEGER(8) t$5
        INTEGER(8) t$9
        INTEGER(8) t$6
        INTEGER(8) t$11
        INTEGER(8) t$7
        INTEGER(8) t$13
        INTEGER(8) t$8
        INTEGER(8) t$16
        INTEGER(4) I
        INTEGER(4) J
        INTEGER(4) K
        INTEGER(8) t$10
        INTEGER(8) t$12
        INTEGER(8) t$14
        INTEGER(8) t$15
        INTEGER(8) t$17
C
C       **** Temporary variables ****
C
        INTEGER(4) tmp0_I_0
        INTEGER(4) wd_J
        INTEGER(4) I1
        INTEGER(4) wd_$wd_J
        INTEGER(4) I2
        INTEGER(4) J0
        INTEGER(4) I0
        INTEGER(4) wd_J1
        INTEGER(4) I3
        INTEGER(4) wd_$wd_J1
        INTEGER(4) I4
        INTEGER(4) wd_K
        INTEGER(4) J1
        INTEGER(4) I5
        INTEGER(4) wd_J0
        INTEGER(4) I6
        INTEGER(4) wd_$wd_J0
        INTEGER(4) I7
C
C       **** statements ****
C
        t$5 = NDIM1
        t$6 = NDIM3
        t$9 = MAX(NDIM1, 0)
        t$10 = (MAX(NDIM1, 0) * 2)
        t$11 = MAX(NDIM3, 0)
        t$12 = (MAX(NDIM1, 0) * MAX(NDIM3, 0))
        t$7 = NDIM2
        t$8 = VECLEN
        t$13 = MAX(NDIM2, 0)
        t$14 = (MAX(NDIM2, 0) * 2)
        t$15 = ((MAX(NDIM2, 0) * MAX(NDIM3, 0)) * 2)
        t$16 = MAX(VECLEN, 0)
        t$17 = (MAX(NDIM2, 0) *(MAX(NDIM3, 0) * MAX(VECLEN, 0)))
        DO J = 1, (NDIM3 + -7_8), 8
          IF(NDIM1 .GE. 1) THEN
            tmp0_I_0 = (J + K)
            DO I = 1, NDIM1, 1
              A(I, J) = DBLE((J +(I + K)))
              A(I, J + 1) = DBLE(((I + tmp0_I_0) + 1))
              A(I, J + 2) = DBLE(((I + tmp0_I_0) + 2))
              A(I, J + 3) = DBLE(((I + tmp0_I_0) + 3))
              A(I, J + 4) = DBLE(((I + tmp0_I_0) + 4))
              A(I, J + 5) = DBLE(((I + tmp0_I_0) + 5))
              A(I, J + 6) = DBLE(((I + tmp0_I_0) + 6))
              A(I, J + 7) = DBLE(((I + tmp0_I_0) + 7))
            END DO
          ENDIF
        END DO
        DO wd_J = J, NDIM3 + -1, 2
          DO I1 = 1, NDIM1, 1
            A(I1, wd_J) = DBLE((wd_J +(K + I1)))
            A(I1, wd_J + 1) = DBLE(((wd_J +(K + I1)) + 1))
          END DO
        END DO
        DO wd_$wd_J = wd_J, NDIM3, 1
          DO I2 = 1, NDIM1, 1
            A(I2, wd_$wd_J) = DBLE((wd_$wd_J +(K + I2)))
          END DO
        END DO
        DO K = 1, VECLEN + -1, 2
          DO J0 = 1, (NDIM3 + -5_8), 6
            DO I0 = 1, NDIM2, 1
              B(I0, J0, K) = DBLE(((K - I0) - J0))
              B(I0, J0, K + 1) = DBLE((((K - I0) - J0) + 1))
              B(I0, J0 + 1, K) = DBLE(((K - I0) -(J0 + 1)))
              B(I0, J0 + 1, K + 1) = DBLE((((K - I0) -(J0 + 1)) + 1))
              B(I0, J0 + 2, K) = DBLE(((K - I0) -(J0 + 2)))
              B(I0, J0 + 2, K + 1) = DBLE((((K - I0) -(J0 + 2)) + 1))
              B(I0, J0 + 3, K) = DBLE(((K - I0) -(J0 + 3)))
              B(I0, J0 + 3, K + 1) = DBLE((((K - I0) -(J0 + 3)) + 1))
              B(I0, J0 + 4, K) = DBLE(((K - I0) -(J0 + 4)))
              B(I0, J0 + 4, K + 1) = DBLE((((K - I0) -(J0 + 4)) + 1))
              B(I0, J0 + 5, K) = DBLE(((K - I0) -(J0 + 5)))
              B(I0, J0 + 5, K + 1) = DBLE((((K - I0) -(J0 + 5)) + 1))
            END DO
          END DO
          DO wd_J1 = J0, NDIM3 + -1, 2
            DO I3 = 1, NDIM2, 1
              B(I3, wd_J1, K) = DBLE(((K - I3) - wd_J1))
              B(I3, wd_J1, K + 1) = DBLE((((K - I3) - wd_J1) + 1))
              B(I3, wd_J1 + 1, K) = DBLE(((K - I3) -(wd_J1 + 1)))
              B(I3, wd_J1 + 1, K + 1) = DBLE((((K - I3) -(wd_J1 + 1)) + 1))
            END DO
          END DO
          DO wd_$wd_J1 = wd_J1, NDIM3, 1
            DO I4 = 1, NDIM2, 1
              B(I4, wd_$wd_J1, K) = DBLE(((K - I4) - wd_$wd_J1))
              B(I4, wd_$wd_J1, K + 1) = DBLE((((K - I4) - wd_$wd_J1) + 1))
            END DO
          END DO
        END DO
        DO wd_K = K, VECLEN, 1
          DO J1 = 1, (NDIM3 + -5_8), 6
            DO I5 = 1, NDIM2, 1
              B(I5, J1, wd_K) = DBLE(((wd_K - I5) - J1))
              B(I5, J1 + 1, wd_K) = DBLE(((wd_K - I5) -(J1 + 1)))
              B(I5, J1 + 2, wd_K) = DBLE(((wd_K - I5) -(J1 + 2)))
              B(I5, J1 + 3, wd_K) = DBLE(((wd_K - I5) -(J1 + 3)))
              B(I5, J1 + 4, wd_K) = DBLE(((wd_K - I5) -(J1 + 4)))
              B(I5, J1 + 5, wd_K) = DBLE(((wd_K - I5) -(J1 + 5)))
            END DO
          END DO
          DO wd_J0 = J1, NDIM3 + -1, 2
            DO I6 = 1, NDIM2, 1
              B(I6, wd_J0, wd_K) = DBLE(((wd_K - I6) - wd_J0))
              B(I6, wd_J0 + 1, wd_K) = DBLE(((wd_K - I6) -(wd_J0 + 1)))
            END DO
          END DO
          DO wd_$wd_J0 = wd_J0, NDIM3, 1
            DO I7 = 1, NDIM2, 1
              B(I7, wd_$wd_J0, wd_K) = DBLE(((wd_K - I7) - wd_$wd_J0))
            END DO
          END DO
        END DO
        RETURN
        END SUBROUTINE

        SUBROUTINE diff2(N, C, D, DIFF)
        IMPLICIT NONE
        INTEGER(4) N
        REAL(8) C(t$18)
        REAL(8) D(t$18)
        REAL(8) DIFF
C
C       **** Variables and functions ****
C
        INTEGER(8) t$18
        INTEGER(8) t$19
        INTEGER(4) I
C
C       **** statements ****
C
        t$18 = N
        t$19 = MAX(N, 0)
        DIFF = 0.0D00
        DO I = 1, N, 1
          DIFF = (DIFF +(((C(I) - D(I))) ** 2))
        END DO
        RETURN
        END SUBROUTINE

        SUBROUTINE method1(NDIM1, NDIM2, NDIM3, VECLEN, A, B, C)
        IMPLICIT NONE
        INTEGER(4) NDIM1
        INTEGER(4) NDIM2
        INTEGER(4) NDIM3
        INTEGER(4) VECLEN
        REAL(8) A((t$21) - -1_8, (t$22) - -1_8)
        REAL(8) B((t$23) - -1_8, (t$22) - -1_8, (t$24) - -1_8)
        REAL(8) C((t$23) - -1_8, (t$21) - -1_8, (t$24) - -1_8)
C
C       **** Variables and functions ****
C
        INTEGER(8) t$21
        INTEGER(8) t$26
        INTEGER(8) t$22
        INTEGER(8) t$29
        INTEGER(8) t$23
        INTEGER(8) t$32
        INTEGER(8) t$24
        INTEGER(8) t$36
        INTEGER(4) I
        INTEGER(4) N1
        INTEGER(4) N2
        INTEGER(4) N3
        INTEGER(8) t$25
        INTEGER(8) t$27
        INTEGER(8) t$28
        INTEGER(8) t$30
        INTEGER(8) t$31
        INTEGER(8) t$33
        INTEGER(8) t$34
        INTEGER(8) t$35
        INTEGER(8) t$37
        INTEGER(8) t$38
        INTEGER(8) t$39
C
C       **** Temporary variables ****
C
        REAL(8) mi0
C
C       **** statements ****
C
        t$21 = INT8((NDIM1 + -1))
        t$22 = INT8((NDIM3 + -1))
        t$25 = (INT8((NDIM1 + -1)) + 1)
        t$26 = MAX((INT8((NDIM1 + -1)) + 1), 0)
        t$27 = (MAX((INT8((NDIM1 + -1)) + 1), 0) * 2)
        t$28 = (INT8((NDIM3 + -1)) + 1)
        t$29 = MAX((INT8((NDIM3 + -1)) + 1), 0)
        t$30 = (MAX((INT8((NDIM1 + -1)) + 1), 0) * MAX((INT8((NDIM3 + -1)) + 
1), 0))
        t$23 = INT8((NDIM2 + -1))
        t$24 = INT8((VECLEN + -1))
        t$31 = (INT8((NDIM2 + -1)) + 1)
        t$32 = MAX((INT8((NDIM2 + -1)) + 1), 0)
        t$33 = (MAX((INT8((NDIM2 + -1)) + 1), 0) * 2)
        t$34 = ((MAX((INT8((NDIM2 + -1)) + 1), 0) * MAX((INT8((NDIM3 + -1)) + 
1), 0)) * 2)
        t$35 = (INT8((VECLEN + -1)) + 1)
        t$36 = MAX((INT8((VECLEN + -1)) + 1), 0)
        t$37 = (MAX((INT8((NDIM2 + -1)) + 1), 0) *(MAX((INT8((NDIM3 + -1)) + 
1), 0) * MAX((INT8((VECLEN + -1)) + 1), 0)))
        t$38 = ((MAX((INT8((NDIM1 + -1)) + 1), 0) * MAX((INT8((NDIM2 + -1)) + 
1), 0)) * 2)
        t$39 = (MAX((INT8((NDIM2 + -1)) + 1), 0) *(MAX((INT8((NDIM1 + -1)) + 
1), 0) * MAX((INT8((VECLEN + -1)) + 1), 0)))
        IF(NDIM2 .GE. 1) THEN
          DO I = 0, VECLEN + -1, 1
            DO N3 = 0, NDIM3 + -1, 1
              DO N1 = 0, NDIM1 + -1, 1
                mi0 = A(N1 + 1, N3 + 1)
                DO N2 = 0, NDIM2 + -1, 1
                  C(N2 + 1, N1 + 1, I + 1) = (C(N2 + 1, N1 + 1, I + 1) +(B(N2 + 
1, N3 + 1, I + 1) * mi0))
                END DO
              END DO
            END DO
          END DO
        ENDIF
        RETURN
        END SUBROUTINE

        SUBROUTINE method2(NDIM1, NDIM2, NDIM3, VECLEN, A, B, C)
        IMPLICIT NONE
        INTEGER(4) NDIM1
        INTEGER(4) NDIM2
        INTEGER(4) NDIM3
        INTEGER(4) VECLEN
        REAL(8) A(t$40, t$41)
        REAL(8) B(t$42, t$41, t$43)
        REAL(8) C(t$42, t$40, t$43)
C
C       **** Variables and functions ****
C
        INTEGER(8) t$40
        INTEGER(8) t$44
        INTEGER(8) t$41
        INTEGER(8) t$46
        INTEGER(8) t$42
        INTEGER(8) t$48
        INTEGER(8) t$43
        INTEGER(8) t$51
        INTEGER(4) I
        INTEGER(4) N1
        INTEGER(4) N2
        INTEGER(4) N3
        INTEGER(8) t$45
        INTEGER(8) t$47
        INTEGER(8) t$49
        INTEGER(8) t$50
        INTEGER(8) t$52
        INTEGER(8) t$53
        INTEGER(8) t$54
C
C       **** Temporary variables ****
C
        REAL(8) mi1
        REAL(8) mi2
        REAL(8) mi3
        REAL(8) mi4
        REAL(8) mi5
        REAL(8) mi6
        REAL(8) mi7
        REAL(8) mi8
        REAL(8) mi9
        REAL(8) mi10
        REAL(8) mi11
        REAL(8) mi12
        REAL(8) mi13
        REAL(8) mi14
        REAL(8) mi15
        REAL(8) mi16
        INTEGER(4) wd_N
        REAL(8) mi17
        REAL(8) mi18
        REAL(8) mi19
        REAL(8) mi20
        INTEGER(4) N0
        INTEGER(4) wd_N3
        INTEGER(4) N
        REAL(8) mi21
        REAL(8) mi22
        REAL(8) mi23
        REAL(8) mi24
        INTEGER(4) N4
        INTEGER(4) wd_N1
        REAL(8) mi25
        INTEGER(4) N5
C
C       **** statements ****
C
        t$40 = NDIM1
        t$41 = NDIM3
        t$44 = MAX(NDIM1, 0)
        t$45 = (MAX(NDIM1, 0) * 2)
        t$46 = MAX(NDIM3, 0)
        t$47 = (MAX(NDIM1, 0) * MAX(NDIM3, 0))
        t$42 = NDIM2
        t$43 = VECLEN
        t$48 = MAX(NDIM2, 0)
        t$49 = (MAX(NDIM2, 0) * 2)
        t$50 = ((MAX(NDIM2, 0) * MAX(NDIM3, 0)) * 2)
        t$51 = MAX(VECLEN, 0)
        t$52 = (MAX(NDIM2, 0) *(MAX(NDIM3, 0) * MAX(VECLEN, 0)))
        t$53 = ((MAX(NDIM1, 0) * MAX(NDIM2, 0)) * 2)
        t$54 = (MAX(NDIM2, 0) *(MAX(NDIM1, 0) * MAX(VECLEN, 0)))
        DO I = 1, VECLEN, 1
          DO N3 = 1, (NDIM3 + -3_8), 4
            DO N1 = 1, (NDIM1 + -3_8), 4
              IF(NDIM2 .GE. 1) THEN
                mi1 = A(N1, N3)
                mi2 = A(N1 + 3, N3 + 3)
                mi3 = A(N1 + 3, N3 + 2)
                mi4 = A(N1 + 3, N3 + 1)
                mi5 = A(N1 + 3, N3)
                mi6 = A(N1, N3 + 1)
                mi7 = A(N1 + 2, N3 + 3)
                mi8 = A(N1 + 2, N3 + 2)
                mi9 = A(N1 + 2, N3 + 1)
                mi10 = A(N1 + 2, N3)
                mi11 = A(N1, N3 + 2)
                mi12 = A(N1 + 1, N3 + 3)
                mi13 = A(N1 + 1, N3 + 2)
                mi14 = A(N1 + 1, N3 + 1)
                mi15 = A(N1 + 1, N3)
                mi16 = A(N1, N3 + 3)
                DO N2 = 1, NDIM2, 1
                  C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3, I) * mi1))
                  C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 1, I) * mi6))
                  C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 2, I) * mi11))
                  C(N2, N1, I) = (C(N2, N1, I) +(B(N2, N3 + 3, I) * mi16))
                  C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3, I) * mi15))
                  C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 1, I) * 
mi14))
                  C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 2, I) * 
mi13))
                  C(N2, N1 + 1, I) = (C(N2, N1 + 1, I) +(B(N2, N3 + 3, I) * 
mi12))
                  C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3, I) * mi10))
                  C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 1, I) * 
mi9))
                  C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 2, I) * 
mi8))
                  C(N2, N1 + 2, I) = (C(N2, N1 + 2, I) +(B(N2, N3 + 3, I) * 
mi7))
                  C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3, I) * mi5))
                  C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 1, I) * 
mi4))
                  C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 2, I) * 
mi3))
                  C(N2, N1 + 3, I) = (C(N2, N1 + 3, I) +(B(N2, N3 + 3, I) * 
mi2))
                END DO
              ENDIF
            END DO
            IF(NDIM2 .GE. 1) THEN
              DO wd_N = N1, NDIM1, 1
                mi17 = A(wd_N, N3)
                mi18 = A(wd_N, N3 + 1)
                mi19 = A(wd_N, N3 + 3)
                mi20 = A(wd_N, N3 + 2)
                DO N0 = 1, NDIM2, 1
                  C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3, I) * mi17))
                  C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 1, I) * mi18))
                  C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 2, I) * mi20))
                  C(N0, wd_N, I) = (C(N0, wd_N, I) +(B(N0, N3 + 3, I) * mi19))
                END DO
              END DO
            ENDIF
          END DO
          DO wd_N3 = N3, NDIM3, 1
            DO N = 1, (NDIM1 + -3_8), 4
              IF(NDIM2 .GE. 1) THEN
                mi21 = A(N, wd_N3)
                mi22 = A(N + 3, wd_N3)
                mi23 = A(N + 1, wd_N3)
                mi24 = A(N + 2, wd_N3)
                DO N4 = 1, NDIM2, 1
                  C(N4, N, I) = (C(N4, N, I) +(B(N4, wd_N3, I) * mi21))
                  C(N4, N + 1, I) = (C(N4, N + 1, I) +(B(N4, wd_N3, I) * mi23))
                  C(N4, N + 2, I) = (C(N4, N + 2, I) +(B(N4, wd_N3, I) * mi24))
                  C(N4, N + 3, I) = (C(N4, N + 3, I) +(B(N4, wd_N3, I) * mi22))
                END DO
              ENDIF
            END DO
            IF(NDIM2 .GE. 1) THEN
              DO wd_N1 = N, NDIM1, 1
                mi25 = A(wd_N1, wd_N3)
                DO N5 = 1, NDIM2, 1
                  C(N5, wd_N1, I) = (C(N5, wd_N1, I) +(B(N5, wd_N3, I) * mi25))
                END DO
              END DO
            ENDIF
          END DO
        END DO
        RETURN
        END SUBROUTINE
<Prev in Thread] Current Thread [Next in Thread>