/******************************************************************************** * * Livermoore Loop no 8 * * A. D. I. Integration * Parallel Algorithm * ********************************************************************************/ use StdIO: all; use Array: all; use Hiding: all; #define N 99 inline double[+] second( double[+] a, double[+] b) { return( b); } /* * The superfluous argument iv to Loop8 has been added in order to prevent LIR * from moving the Loop1 call from the WL in main! For the same reason, Loop8 * is NOT inlined! */ double[+] Loop8( int[*] iv, int n, double[+] U1, double[+] U2, double[+] U3, double[+] U, double[10] data) { A11=data[0]; A12=data[1]; A13=data[2]; A21=data[3]; A22=data[4]; A23=data[5]; A31=data[6]; A32=data[7]; A33=data[8]; SIG=data[9]; DU1 = shift([0,0,-1], U1) - shift([0,0,1], U1); DU2 = shift([0,0,-1], U2) - shift([0,0,1], U2); DU3 = shift([0,0,-1], U3) - shift([0,0,1], U3); V = U + A11 * DU1 + A12 * DU2 + A13 * DU3 + SIG * ( shift([-1,0,0], U) - 2d * U + shift([1,0,0], U)); res = tile( [2,1,N], [1,0,1], V); return( res); } double[.,.,.] gen3dArray(int l, int w, int d) { A = hideValue(0, tod(1 + iota(l*w*d))); B = reshape([l, w, d], A); return(B); } /* * The original Sisal program computes U1, U2, and U3 within the same for-loop. * Unfortunately, this is not possible in SAC since with-loops always have a * single result. * One drawback is that DU1, DU2, and DU3 have to be re-computed several times, * whereas the Sisal implementations shares the computation within the for-loop. * The only way out for SAC is to implement with-loop fusion as an additional * optimization and to, thus, fuse the three consecutive with-loops. * * 2006-11-09: Fused with-loops below into multi-operator loops. -fbu */ int main () { iterations = (:int) FibreScanIntArray(); n = (:int) FibreScanIntArray(); A11 = FibreScanDoubleArray( ); A12 = FibreScanDoubleArray( ); A13 = FibreScanDoubleArray( ); A21 = FibreScanDoubleArray( ); A22 = FibreScanDoubleArray( ); A23 = FibreScanDoubleArray( ); A31 = FibreScanDoubleArray( ); A32 = FibreScanDoubleArray( ); A33 = FibreScanDoubleArray( ); SIG = FibreScanDoubleArray( ); U1in = gen3dArray(5, 2, 101); U2in = gen3dArray(5, 2, 101); U3in = gen3dArray(5, 2, 101); data = genarray([10], 0.0d); data[0] = A11; data[1] = A12; data[2] = A13; data[3] = A11; data[4] = A12; data[5] = A13; data[6] = A11; data[7] = A12; data[8] = A13; data[9] = SIG; if( n -1 == N ) { #ifdef SCALAR_ARG /* * This flag decides whether the index variable is given to Loop8() as * a vector or as a scalar. * * Although it seems to be clear that the latter is faster, experiments * show the contrary. Analysis of partly compiled code reveals no insight * why. So, we must leave it subject to future investigations. */ U1, U2, U3 = with { ( [1] <= [i] <= [iterations] ) { u1res = Loop8( i, n, U1in, U2in, U3in, U1in, data); u2res = Loop8( i, n, U1in, U2in, U3in, U2in, data); u3res = Loop8( i, n, U1in, U2in, U3in, U3in, data); } : ( u1res, u2res, u3res ); } : ( fold( second, take( [2,1,N], U1in ) ), fold( second, take( [2,1,N], U2in ) ), fold( second, take( [2,1,N], U3in ) ) ); #else /* SCALAR_ARG */ U1, U2, U3 = with { ( [1] <= iv <= [iterations] ) { u1res = Loop8( iv, n, U1in, U2in, U3in, U1in, data); u2res = Loop8( iv, n, U1in, U2in, U3in, U2in, data); u3res = Loop8( iv, n, U1in, U2in, U3in, U3in, data); } : ( u1res, u2res, u3res ); } : ( fold( second, take( [2,1,N], U1in ) ), fold( second, take( [2,1,N], U2in ) ), fold( second, take( [2,1,N], U3in ) ) ); #endif /* SCALAR_ARG */ FibrePrint(U1); FibrePrint(U2); FibrePrint(U3); } else { printf( "wrong n (%d) read; should be (%d)!\n", n, N); } return(0); }