For example x = _ulong_as_double_S_(y) corresponds to C-code unsigned long y; double x = *(double *)&y;
This is useful for implementing numerical code, see for example the sincospi.c code I wrote for FFTs: to port this to SaC, we need type punning for finding the quadrant of the integer-part of the argument. Additionally, it is the most efficient way to convert random numbers to double. Not only is the conversion instruction (vcvtsi2sd) expensive, there exists no SIMD-variant of it, which also destroys the SIMD-ification of subsequent math functions on the result.
sincospi.c:
/**
* Algorithm:
* We write a = 0.5 k + r with k an integer, and r in [-0.25, 0.25].
*
* We can compute cospi(a), sinpi(a) from cospi(r), sinpi(r) exactly
* using trig-formulas.
*
* We approximate cospi(r) with a polynomial of the form Q(x^2) using
* the Remez algorithm (https://github.com/samhocevar/lolremez)
* and sinpi(r) with a polynomial of the form x Q(x^2).
*
* We can write k as 4m + 2l + j with 0 <= j, l < 2. We disregard m
* as sinpi and cospi have period 4. Note that j is the least significant
* bit, and l is the second-least significant bit of r.
*
* This gives
* sinpi(r + 0.5 (2l + j)) = sin(t * pi + l * pi + 0.5 * j * pi)
* cospi(r + 0.5 (2l + j)) = cos(t * pi + l * pi + 0.5 * j * pi)
*
* As
* sin(r + pi) = -sin(r),
* cos(r + pi) = -cos(r),
* sin(r + pi / 2) = cos(r),
* cos(r + pi / 2) = -sin(r),
*
* we negate sin if l = 1 and r = 0, negate cos if l = 1, and
* swap cos, sin if r = 1.
**/
#include <stdint.h>
#include <math.h>
union union_id {
int64_t i[LANES];
double d[LANES];
};
static inline void
sincospi_LANES(double a[LANES],
double * restrict s_out, double * restrict c_out)
{
double s [LANES];
double c [LANES];
double r [LANES];
double rr[LANES];
union union_id k;
#pragma omp simd
for (int l = 0; l < LANES; l++) {
/* Range reduction */
k.d[l] = nearbyint(a[l] + a[l]);
r[l] = fma(-0.5, k.d[l], a[l]);
rr[l] = r[l] * r[l];
k.d[l] = k.d[l] + (0x1p51 + 0x1p52); // https://stackoverflow.com/questions/17035464
/* Horner's scheme */
c[l] = 0x1.f3dbf26b691b1p-10 ;
c[l] = fma(c[l], rr[l], -0x1.a6c9c229ef09bp-6 );
c[l] = fma(c[l], rr[l], 0x1.e1f4fb6119954p-3 );
c[l] = fma(c[l], rr[l], -0x1.55d3c7dbfe0ep+0 );
c[l] = fma(c[l], rr[l], 0x1.03c1f081b078ep+2 );
c[l] = fma(c[l], rr[l], -0x1.3bd3cc9be458bp+2 );
c[l] = fma(c[l], rr[l], 0x1p+0 );
s[l] = 0x1.e398872bf8382p-12 ;
s[l] = fma(s[l], rr[l], -0x1.e2ff49f811cf3p-8 );
s[l] = fma(s[l], rr[l], 0x1.50782e686844bp-4 );
s[l] = fma(s[l], rr[l], -0x1.32d2cce12a10dp-1 );
s[l] = fma(s[l], rr[l], 0x1.466bc6775679ep+1 );
s[l] = fma(s[l], rr[l], -0x1.4abbce625be21p+2 );
/* Polynomial is of the form r Q(r^2), merged with the last step of
* Horner, gives s = r * (s * r^2 + 0x1.921fb54442d18p+1). */
s[l] = fma(s[l], rr[l], 0x1.921fb54442d18p+1 );
s[l] = s[l] * r[l];
/**
* Recover unreduced result.
* (Both gcc 15.2.0 and clang 14.0.6 generate bitmasks, blends,
* and flips of the sign-bit from this, which is absolutely insane.)
**/
if (k.i[l] & 2) {
s[l] = -s[l];
c[l] = -c[l];
}
if (k.i[l] & 1) {
r[l] = -s[l];
s[l] = c[l];
c[l] = r[l];
}
s_out[l] = s[l];
c_out[l] = c[l];
}
}