Constant folding does not optimise sel(jv, {iv -> expr(iv) | iv < ub})
Consider the following program.
inline
int[d:n1] slide(int[d] i, int[d:m] x, int[d] n1) | all(n1 + i <= m)
{
return {iv -> _sel_VxA_(_add_VxV_(iv, i), x)
| iv < n1};
}
int +(int a, int b)
{
return _add_SxS_(a, b);
}
noinline
int[*] id(int[*] x)
{
return x;
}
int main()
{
inp = id(with {}: genarray([28, 28], 2));
#if 1
bla = {x22 -> with {
([0, 0] <= x21 < [5, 5]): _sel_VxA_(x22, slide(x21, inp, [23, 23]));
}: fold(+, 0)
| x22 < [24, 24]};
#else
bla = {x22 -> with {
([0, 0] <= x21 < [5, 5]): _sel_VxA_(_add_VxV_(x22, x21), inp);
}: fold(+, 0)
| x22 < [24, 24]};
#endif
res = _sel_VxA_([0, 0], bla);
return res;
}
Compiling the following example with sac2c_d -bopt -printfun main
gives
/****************************************************************************
* _MAIN::main(...) [ body ]
****************************************************************************/
int _MAIN::main()
/*
* main :: ---
*/
{
int _ivesli_2836 { , NN } ;
int _ivesli_2835 { , NN } ;
int _ivesli_2834 { , NN } ;
int _ivesli_2832 { , NN } ;
int _wlidx_2816__flat_91 { , NN } ;
int _wlidx_2815_bla { , NN } ;
int _wlidx_2814__flat_33 { , NN } ;
int _pinl_405__eat_107 { , NN } ;
int _pinl_404__eat_106 { , NN } ;
int _pinl_403__mose_8__SSA0_1 { , NN } ;
int[2] _pinl_402_iv { , NN } ;
int _ea_327__flat_90 { , NN } ;
int _ea_326__mose_9__SSA0_1 { , NN } ;
int _eat_105 { , NN } ;
int _eat_104 { , NN } ;
int _eat_103 { , NN } ;
int _eat_102 { , NN } ;
int _eat_99 { , NN } ;
int _eat_98 { , NN } ;
int _mose_9__SSA0_1 { , NN } ;
int[2] x21__SSA0_1 { , NN } ;
int res { , NN } ;
int[24,24] bla { , NN } ;
int[2] x22 { , NN } ;
int[28,28] inp { , NN } ;
int[2] _hzgwl_12 { , NN } ;
int[23,23] _flat_91 { , NN } ;
int _flat_90 { , NN } ;
int{0} _flat_39 { , NN } ;
int{2} _flat_37 { , NN } ;
int[28,28] _flat_33 { , NN } ;
_flat_39 = 0;
_flat_37 = 2;
_flat_33 = with /** FOLDABLE (all gen's const) **/
/** REFERENCED: 1 (total num refs) **/
{
/* Partn */
([ 0, 0 ] <= _hzgwl_12=[_eat_99, _eat_98] (IDXS:_wlidx_2814__flat_33) < [ 28, 28 ] genwidth [ 28, 28 ])
{
} : _flat_37 ;
} :
genarray( [ 28, 28 ], _flat_37, IDX(_wlidx_2814__flat_33));
inp = _MAIN::id( _flat_33) ;
bla = with /** FOLDABLE (all gen's const) **/
/** REFERENCED: 1 (total num refs) **/
{
/* Partn */
([ 0, 0 ] <= x22=[_eat_103, _eat_102] (IDXS:_wlidx_2815_bla) < [ 24, 24 ] genwidth [ 24, 24 ])
{
_ivesli_2836 = _idxs2offset_( [ 23, 23 ], _eat_103, _eat_102);
_mose_9__SSA0_1 = with /** FOLDABLE (all gen's const) **/
/** REFERENCED: 1 (total num refs) **/
{
/* Partn */
([ 0, 0 ] <= x21__SSA0_1=[_eat_105, _eat_104] < [ 5, 5 ] genwidth [ 5, 5 ])
{
_ea_326__mose_9__SSA0_1 = _accu_( x21__SSA0_1, _flat_39);
_ivesli_2832 = _idxs2offset_( [ 28, 28 ], _eat_105, _eat_104);
_flat_91 = with /** FOLDABLE (all gen's const) **/
/** REFERENCED: 1 (total num refs) **/
{
/* Partn */
([ 0, 0 ] <= _pinl_402_iv=[_pinl_405__eat_107, _pinl_404__eat_106] (IDXS:_wlidx_2816__flat_91) < [ 23, 23 ] genwidth [ 23, 23 ])
{
_ivesli_2834 = _idxs2offset_( [ 28, 28 ], _pinl_405__eat_107, _pinl_404__eat_106);
_ivesli_2835 = _add_SxS_( _ivesli_2832, _ivesli_2834);
_pinl_403__mose_8__SSA0_1 = _idx_sel_( _ivesli_2835, inp);
} : _pinl_403__mose_8__SSA0_1 ;
} :
genarray( [ 23, 23 ], _flat_39, IDX(_wlidx_2816__flat_91));
_flat_90 = _idx_sel_( _ivesli_2836, _flat_91);
_ea_327__flat_90 = _add_SxS_( _ea_326__mose_9__SSA0_1, _flat_90);
} : _ea_327__flat_90 ;
} :
fold( _MAIN::+(), _flat_39);
} : _mose_9__SSA0_1 ;
} :
genarray( [ 24, 24 ], _flat_39, IDX(_wlidx_2815_bla));
res = _idx_sel_( _flat_39, bla);
return( res);
}
/*-----------------------------------------------*/
so slide
is computed every iteration of the fold-loop, whereas I would have expected a simple selection inp[x21 + x22]
, as in the commented out version.