var is used before its value is set, related MMV
I've been having a go today getting a instrumentation tool called Score_P (https://www.vi-hps.org/projects/score-p) working with sac2c compiled CUDA code and ran into a funny problem.
With the following example (based on matmul from demos):
#ifndef SIZE1
#define SIZE1 1000
#endif
#ifndef SIZE2
#define SIZE2 1000
#endif
#ifndef SIZE3
#define SIZE3 1000
#endif
inline double[+] genarray (int[.] shp, double def)
{
return with {} : genarray (shp, def);
}
inline int[.] shape(double[*] a) { return _shape_A_ (a); }
inline int[*] sel( int[.] idx, int[*] array)
{
new_shape = _drop_SxV_( _sel_VxA_( [0], _shape_A_(idx)),
_shape_A_(array));
res = with {
( . <= iv <= . ) {
new_idx = _cat_VxV_( idx, iv);
} : _sel_VxA_(new_idx, array);
} : genarray( new_shape, 0);
return( res);
}
inline double[*] sel( int[.] idx, double[*] array)
{
new_shape = _drop_SxV_( _sel_VxA_( [0], _shape_A_(idx)),
_shape_A_(array));
res = with {
( . <= iv <= . ) {
new_idx = _cat_VxV_( idx, iv);
} : _sel_VxA_(new_idx, array);
} : genarray( new_shape, 0d);
return( res);
}
inline double[*] sel(int shp, double[*] a) { return sel([shp], a); }
inline int[*] sel(int shp, int[*] a) { return sel([shp], a); }
inline bool <(int a, int b) { return _lt_SxS_ (a,b); }
inline bool <=(int a, int b) { return _le_SxS_ (a,b); }
inline double *(double a, double b) { return _mul_SxS_ (a, b); }
inline double[+] *(double[+] a, double[+] b)
{
return with {
(_mul_SxV_ (0, _shape_A_ (a)) <= iv < _shape_A_(a))
: _sel_VxA_ (iv, a) * _sel_VxA_ (iv, b);
} : modarray (a);
}
inline double +(double a, double b) { return _add_SxS_ (a, b); }
inline double[+] +(double[+] a, double[+] b)
{
return with {
(_mul_SxV_ (0, _shape_A_ (a)) <= iv < _shape_A_(a))
: _sel_VxA_ (iv, a) + _sel_VxA_ (iv, b);
} : modarray (a);
}
/** set this to noinline to get rid of the error **/
inline double[*] sum (double[+] a)
{
return with {
( _mul_SxV_(0, _shape_A_(a)) <= iv < _shape_A_(a) )
: _sel_VxA_(iv, a);
} : fold( +, 0.0d);
}
inline double[+] reverse( double[+] arr_a)
{
maxidx0 = _sub_SxS_( _sel_VxA_( [0], _shape_A_( arr_a)), 1);
res = with {
( [0] <= iv = [i] <= [maxidx0] ) : arr_a[[_sub_SxS_(maxidx0, i)]];
} : modarray( arr_a );
return( res);
}
inline int[+] reverse( int[+] arr_a)
{
maxidx0 = _sub_SxS_( _sel_VxA_( [0], _shape_A_( arr_a)), 1);
res = with {
( [0] <= iv = [i] <= [maxidx0] ) : arr_a[[_sub_SxS_(maxidx0, i)]];
} : modarray( arr_a );
return( res);
}
double[+] transpose( double[+] arr_a)
{
res = with {
( . <= iv <= . ) : arr_a[ reverse( iv)];
} : genarray( reverse( _shape_A_( arr_a)), 1.0);
return( res);
}
double[.,.] matmul( double[SIZE1,SIZE2] A, double[SIZE2,SIZE3] B)
{
BT = transpose( B);
C = with {
//#pragma wlcomp BvL0( [32,32], Default)
( . <= [i,j] <= .) : sum( A[[i]] * BT[[j]]);
}: genarray( [ shape(A)[[0]], shape(B)[[1]] ], 1.0);
return( C);
}
int main()
{
A = genarray( [SIZE1,SIZE2], 1.0);
B = genarray( [SIZE2,SIZE3], 2.0);
C = matmul( A, B);
StdIO::printf( "C[0,0] = %f\n", C[0,0]);
return 0;
}
I get the following error message:
/tmp/sac-what/ci.out.cu(1562): warning #549-D: variable "SACp_pinl_818__flat_19" is used before its value is set
...
It is coming from the matmul function...
If we look at the code at phase 20 (PC), we get this for the matmul function:
void _MAIN::SACf__MAIN__matmul__d_1000_1000__d_1000_1000( double[1000,1000] { ,NN } , double[1000,1000] SACl_A { ,NN } , double[1000,1000] SACl_B { ,NN } )
{
double SACp_emal_3476__pinl_1299__flat_82 { , NN } = 1.0;
int SACp_emal_3474__ivesli_3316 { , NN, USAGE: 2 } ;
double SACp_emal_3473__pinl_1304__flat_22 { , NN, USAGE: 3 } ;
double[1000,1000] SACp_emal_3472_BT { , NN, USAGE: 2 } ;
double SACp_emal_3471__pinl_818__flat_19 { , NN } = 0.0;
... // vardecs omitted
double{0.0...} SACp_pinl_818__flat_19 { , NN } ;
int SACp_pinl_878__eat_172 { , NN, USAGE: 1 } ;
int[1] SACp_pinl_876_iv { , NN } ;
double SACp_flat_126 { , NN, USAGE: 4 } ;
int SACl_j { , NN, USAGE: 1 } ;
int SACl_i { , NN, USAGE: 1 } ;
int[2] SACp_flat_125 { , NN } ;
... // some code is omitted here
SACp_emal_3457_C = with2 (SACp_flat_125=[SACl_i, SACl_j] (IDXS:SACp_wlidx_3288_C))
/********** operators: **********/
op_0 =
{
_inc_rc_( SACp_emal_3471__pinl_818__flat_19, 1); // XXX
SACp_emal_3466__ivesli_3311 = _alloc_( 1, 0, [:int]);
SACp_emal_3466__ivesli_3311 = _idxs2offset_( [ 1000, 1000 ], SACl_j, SACp_emal_3470__iveras_3429);
SACp_emal_3465__ivesli_3306 = _alloc_( 1, 0, [:int]);
SACp_emal_3465__ivesli_3306 = _idxs2offset_( [ 1000, 1000 ], SACl_i, SACp_emal_3469__iveras_3430);
SACp_pinl_878__eat_172 = _alloc_( 1, 0, [:int]);
SACp_flat_126 = with2 (SACp_pinl_876_iv=[SACp_pinl_878__eat_172])
/********** operators: **********/
op_0 =
{
SACp_flat_126 = _accu_( SACp_pinl_876_iv, SACp_pinl_818__flat_19); // XXX WTF?!
SACp_emal_3464__ivesli_3313 = _alloc_( 1, 0, [:int]);
SACp_emal_3464__ivesli_3313 = _idxs2offset_( [ 1000, 1000 ], SACp_emal_3468__iveras_3431, SACp_pinl_878__eat_172);
SACp_emal_3463__ivesli_3314 = _alloc_( 1, 0, [:int]);
SACp_emal_3463__ivesli_3314 = _add_SxS_( SACp_emal_3466__ivesli_3311, SACp_emal_3464__ivesli_3313);
SACp_emal_3462__pinl_821__flat_22__SSA7_1 = _alloc_( 1, 0, [:int]);
SACp_emal_3462__pinl_821__flat_22__SSA7_1 = _idx_sel_( SACp_emal_3463__ivesli_3314, SACp_emal_3472_BT);
_free_( SACp_emal_3463__ivesli_3314);
SACp_emal_3464__ivesli_3313 = _add_SxS_( SACp_emal_3465__ivesli_3306, SACp_emal_3464__ivesli_3313);
SACp_emal_3460__pinl_839__flat_22__SSA5_2 = _alloc_( 1, 0, [:int]);
SACp_emal_3460__pinl_839__flat_22__SSA5_2 = _idx_sel_( SACp_emal_3464__ivesli_3313, SACl_A);
_free_( SACp_emal_3464__ivesli_3313);
SACp_emal_3462__pinl_821__flat_22__SSA7_1 = _mul_SxS_( SACp_emal_3462__pinl_821__flat_22__SSA7_1, SACp_emal_3460__pinl_839__flat_22__SSA5_2);
_free_( SACp_emal_3460__pinl_839__flat_22__SSA5_2);
SACp_emal_3462__pinl_821__flat_22__SSA7_1 = _add_SxS_( SACp_emal_3462__pinl_821__flat_22__SSA7_1, SACp_flat_126);
_dec_rc_( SACp_flat_126, 1);
SACp_ufiv_3433__pinl_881__flat_38 = _unshare_( SACp_emal_3462__pinl_821__flat_22__SSA7_1, SACp_pinl_876_iv);
SACp_flat_126 = SACp_ufiv_3433__pinl_881__flat_38;
} : SACp_flat_126 ;
/********** segment 0: **********
* index domain: [ 0 ] -> [ 1000 ]
* bv: [ 1 ], [ 1 ], [ 1 ]
* ubv: [ 1 ]
* sv: [ 1 ]
* homsv: [ 1 ]
*/
(0 -> 1000), step0[0] 1
(0 --> 1): op_0
/********** conexpr: **********/
fold( _MAIN::SACf__MAIN___PL__d__d(), SACp_emal_3471__pinl_818__flat_19);
_free_( SACp_pinl_878__eat_172);
_free_( SACp_emal_3465__ivesli_3306);
_free_( SACp_emal_3466__ivesli_3311);
SACp_flat_126 = _wl_assign_( SACp_flat_126, SACp_emal_3457_C, SACp_flat_125, SACp_wlidx_3288_C);
_dec_rc_( SACp_flat_126, 1);
} : SACp_flat_126 ;
/********** segment 0: **********
* index domain: [ 0, 0 ] -> [ 1000, 1000 ]
* bv: [ 1, 1 ], [ 1, 1 ], [ 1, 1 ]
* ubv: [ 1, 1 ]
* sv: [ 1, 1 ]
* homsv: [ 1, 1 ]
*/
(0 -> 1000), step0[0] 1
(0 --> 1):
(0 -> 1000), step0[1] 1
(0 --> 1): op_0
/********** conexpr: **********/
genarray( [ 1000, 1000 ], SACp_emal_3476__pinl_1299__flat_82, SACp_emal_3457_C, IDX(SACp_wlidx_3288_C));
_free_( SACp_emal_3476__pinl_1299__flat_82);
_free_( SACp_emal_3472_BT);
_free_( SACl_i);
_free_( SACl_j);
_free_( SACp_emal_3471__pinl_818__flat_19);
_free_( SACp_wlidx_3288_C);
_free_( SACp_emal_3470__iveras_3429);
_free_( SACp_emal_3469__iveras_3430);
_free_( SACp_emal_3468__iveras_3431);
_dec_rc_( SACl_A, 1);
return( SACp_emal_3457_C);
}
Notice that we have two *pinl_818__flat_19
variables declared, but only one of them is actually initialised. Also note that in the with2 (fold operation) the accumulator is using _pinl_818__flat_19
... this is where our error message is coming from. If we step through the optimisation in PC we find that before MMV, we have the following in the function:
_emal_3471__pinl_818__flat_19 = _alloc_( 1, 0, [:int]);
_pinl_818__flat_19 = _fill_( 0.0, _emal_3471__pinl_818__flat_19);
After MMV, the _fill_()
is gone, instead all references of _pinl_818__flat_19
are replaced with _emal_3471__pinl_818__flat_19
except in the accumulator in the with2.
Initially I thought this was specific to the CUDA backend, but a this happens with ALL BACKENDS for the given example!!!
A quick solution is to traverse the 2nd argument of the prf_accu
, but I'm unsure if this is the correct thing to do generally....
It's really funny that only NVCC actually issues a message about this, GCC/CLANG make no noise (I wonder if we are disabling some warning