Missed opportunity for memory reuse
Consider the following program
inline
ulong[4] rotl(ulong[4] x, int k)
{
return _bitxor_VxV_(_shl_VxS_(x, _toul_S_(k)),
_shr_VxS_(x, _sub_SxS_(_toul_S_(64), _toul_S_(k))));
}
inline
ulong[4], ulong[4], ulong[4], ulong[4], ulong[4]
xoshiro256p(ulong[4] s0, ulong[4] s1, ulong[4] s2, ulong[4] s3)
{
/* Should be s0 + s3, but see issue 2469 */
result = s0;//_add_VxV_(s0, s3);
t = _shl_VxS_(s1, _toul_S_(17));
s2 = _bitxor_VxV_(s2, s0);
s3 = _bitxor_VxV_(s3, s1);
s1 = _bitxor_VxV_(s1, s2);
s0 = _bitxor_VxV_(s0, s3);
s2 = _bitxor_VxV_(s2, t);
s3 = rotl(s3, 45);
return (result, s0, s1, s2, s3);
}
int main()
{
s0 = [1902348243ul, 92919034ul, 23942043ul, 229034ul];
s1 = [11902348243ul, 192919034ul, 123942043ul, 1229034ul];
s2 = [21902348243ul, 392919034ul, 223942043ul, 2229034ul];
s3 = [21902348243ul, 392919034ul, 323942043ul, 3229034ul];
for (i = 0; _lt_SxS_(i, 10000000); i = _add_SxS_(i, 1)) {
unused, s0, s1, s2, s3 = xoshiro256p(s0, s1, s2, s3);
}
return _toi_S_(_sel_VxA_([0], s0));
}
The updates in Xoshiro256p can be done in-place, but we allocate and fill them all. Excerpt of -bmem
:
_emal_2337__pinl_186_t = _alloc_( 1, 1, [ 4 ]);
_pinl_186_t = _fill_( _shl_VxS_( s1, 17ul), _emal_2337__pinl_186_t);
_emal_2336__pinl_187_s2__SSA0_1 = _alloc_( 1, 1, [ 4 ]);
_pinl_187_s2__SSA0_1 = _fill_( _bitxor_VxV_( s2, s0), _emal_2336__pinl_187_s2__SSA0_1);
_dec_rc_( s2, 1);
_emal_2335__pinl_189_s3__SSA0_1 = _alloc_( 1, 1, [ 4 ]);
_pinl_189_s3__SSA0_1 = _fill_( _bitxor_VxV_( s3, s1), _emal_2335__pinl_189_s3__SSA0_1);
_dec_rc_( s3, 1);
_emal_2334__pinl_191_s1__SSA0_1 = _alloc_( 1, 1, [ 4 ]);
_pinl_191_s1__SSA0_1 = _fill_( _bitxor_VxV_( s1, _pinl_187_s2__SSA0_1), _emal_2334__pinl_191_s1__SSA0_1);
_dec_rc_( s1, 1);
_emal_2333__pinl_193_s0__SSA0_1 = _alloc_( 1, 1, [ 4 ]);
_pinl_193_s0__SSA0_1 = _fill_( _bitxor_VxV_( s0, _pinl_189_s3__SSA0_1), _emal_2333__pinl_193_s0__SSA0_1);
_dec_rc_( s0, 1);
_emal_2332__pinl_195_s2__SSA0_3 = _alloc_( 1, 1, [ 4 ]);
_pinl_195_s2__SSA0_3 = _fill_( _bitxor_VxV_( _pinl_187_s2__SSA0_1, _pinl_186_t), _emal_2332__pinl_195_s2__SSA0_3);
_free_( _pinl_186_t);
_free_( _pinl_187_s2__SSA0_1);