Skip to content
GitLab
  • Menu
Projects Groups Snippets
  • /
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
    • Contribute to GitLab
  • Sign in / Register
  • sac2c sac2c
  • Project information
    • Project information
    • Activity
    • Labels
    • Members
  • Repository
    • Repository
    • Files
    • Commits
    • Branches
    • Tags
    • Contributors
    • Graph
    • Compare
  • Issues 393
    • Issues 393
    • List
    • Boards
    • Service Desk
    • Milestones
  • Merge requests 16
    • Merge requests 16
  • Deployments
    • Deployments
    • Releases
  • Wiki
    • Wiki
  • External wiki
    • External wiki
  • Activity
  • Graph
  • Create a new issue
  • Commits
  • Issue Boards
Collapse sidebar
  • sac-group
  • sac2csac2c
  • Issues
  • #2472
Closed
Open
Created Apr 28, 2025 by Thomas Koopman@thomasDeveloper

Missed opportunity for memory reuse

Consider the following program

inline
ulong[4] rotl(ulong[4] x, int k)
{
	return _bitxor_VxV_(_shl_VxS_(x, _toul_S_(k)),
                      _shr_VxS_(x, _sub_SxS_(_toul_S_(64), _toul_S_(k))));
}

inline
ulong[4], ulong[4], ulong[4], ulong[4], ulong[4]
xoshiro256p(ulong[4] s0, ulong[4] s1, ulong[4] s2, ulong[4] s3)
{
  /* Should be s0 + s3, but see issue 2469 */
	result = s0;//_add_VxV_(s0, s3);

	t = _shl_VxS_(s1, _toul_S_(17));

	s2 = _bitxor_VxV_(s2, s0);
	s3 = _bitxor_VxV_(s3, s1);
	s1 = _bitxor_VxV_(s1, s2);
	s0 = _bitxor_VxV_(s0, s3);

	s2 = _bitxor_VxV_(s2, t);

	s3 = rotl(s3, 45);

  return (result, s0, s1, s2, s3);
}

int main()
{
  s0 = [1902348243ul, 92919034ul, 23942043ul, 229034ul];
  s1 = [11902348243ul, 192919034ul, 123942043ul, 1229034ul];
  s2 = [21902348243ul, 392919034ul, 223942043ul, 2229034ul];
  s3 = [21902348243ul, 392919034ul, 323942043ul, 3229034ul];

  for (i = 0; _lt_SxS_(i, 10000000); i = _add_SxS_(i, 1)) {
    unused, s0, s1, s2, s3 = xoshiro256p(s0, s1, s2, s3);
  }

  return _toi_S_(_sel_VxA_([0], s0));
}

The updates in Xoshiro256p can be done in-place, but we allocate and fill them all. Excerpt of -bmem:

  _emal_2337__pinl_186_t = _alloc_( 1, 1, [ 4 ]); 
  _pinl_186_t = _fill_( _shl_VxS_( s1, 17ul), _emal_2337__pinl_186_t); 
  _emal_2336__pinl_187_s2__SSA0_1 = _alloc_( 1, 1, [ 4 ]); 
  _pinl_187_s2__SSA0_1 = _fill_( _bitxor_VxV_( s2, s0), _emal_2336__pinl_187_s2__SSA0_1); 
  _dec_rc_( s2, 1); 
  _emal_2335__pinl_189_s3__SSA0_1 = _alloc_( 1, 1, [ 4 ]); 
  _pinl_189_s3__SSA0_1 = _fill_( _bitxor_VxV_( s3, s1), _emal_2335__pinl_189_s3__SSA0_1); 
  _dec_rc_( s3, 1); 
  _emal_2334__pinl_191_s1__SSA0_1 = _alloc_( 1, 1, [ 4 ]); 
  _pinl_191_s1__SSA0_1 = _fill_( _bitxor_VxV_( s1, _pinl_187_s2__SSA0_1), _emal_2334__pinl_191_s1__SSA0_1); 
  _dec_rc_( s1, 1); 
  _emal_2333__pinl_193_s0__SSA0_1 = _alloc_( 1, 1, [ 4 ]); 
  _pinl_193_s0__SSA0_1 = _fill_( _bitxor_VxV_( s0, _pinl_189_s3__SSA0_1), _emal_2333__pinl_193_s0__SSA0_1); 
  _dec_rc_( s0, 1); 
  _emal_2332__pinl_195_s2__SSA0_3 = _alloc_( 1, 1, [ 4 ]); 
  _pinl_195_s2__SSA0_3 = _fill_( _bitxor_VxV_( _pinl_187_s2__SSA0_1, _pinl_186_t), _emal_2332__pinl_195_s2__SSA0_3); 
  _free_( _pinl_186_t); 
  _free_( _pinl_187_s2__SSA0_1);
To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information
Assignee
Assign to
Time tracking