Complexity problem in IVD
IVD checks for each N_spids
whether a variable declaration exists, inserting one if necessary. This search is a linear traversal of the list of variable declarations. If we have n
variables without declaration, that means we check sum_{i = 1}^n i = n(n + 1)/2 \in O(n^2)
times. For the program below, the phase FLAT generates about 100K variables without declaration, leading to this phase taking about 20 minutes.
use Array: all;
use Math: all;
use StdIO: all;
#define CATEGORIES 10
inline
float[n:shpi] sumOuter(int m, float[m:shpo,n:shpi] x)
{
return {iv -> sum({jv -> x[jv++iv] | jv < shpo}) | iv < shpi};
}
inline
float[d:n1] slide(int[d] i, float[d:m] x, int[d] n) | all(n1 == n + 1)
| all(n + 1 + i <= m)
{
return {iv -> x[iv + i] | iv < n + 1};
}
inline
float[d:mn] backslide(int[d] i, float[d:n1] y, int[d] mn)
| all(i < 1 + mn - n1)
{
return {iv -> 0f | iv < i;
iv -> y[iv - i] | i <= iv < n1 + i;
iv -> 0f | n1 + i <= iv < mn};
}
inline
float[*] logistics(float[*] x)
{
return {iv -> 1f / (1f + exp(-x[iv]))};
}
inline
float[n:oshp,n:ishp] block(float[d:shp] x, int[n] ishp)
| all(shp % ishp == 0)
| all(oshp * ishp == shp)
| (2 * n == d)
{
return {iv -> tile(ishp, iv * ishp, x) | iv < shp / ishp};
}
/* TODO: type patterns */
inline
float[*] unblock(float[*] a, int[.] bshp)
{
shp = drop(-shape(bshp), shape(a)) * bshp;
return {iv -> a[(iv / bshp) ++ (iv % bshp)] | iv < shp};
}
inline
float[n:ishp] selb(float[d:shp] x, int[n] iv, int[n] ishp)
| (2 * n == d)
| all(iv < shp / ishp)
{
return block(x, ishp)[iv];
}
int main()
{
/* Variables */
inp = {[i, j] -> tof(i * j) | [i, j] < [28, 28]};
k1 = genarray( [6,5,5], 1f/25f);
b1 = genarray( [6], 1f/6f);
k2 = genarray( [12,6,5,5], 1f/150f);
b2 = genarray( [12], 1f/12f);
fc = genarray( [CATEGORIES,12,1,4,4], 1f/192f);
b = genarray( [CATEGORIES], 1f/tof(CATEGORIES));
/* Generated code with minor adjustments:
UTF8 -> ASCII
4 -> 4f (as SaC does not coerce types)
one -> 1f
backlide -> backslide
*/
/* Shape [6, 24, 24] */
c11 = { x16 -> (sumOuter(2, { x17 -> (slide(x17, inp, [23, 23])) * ({ x18 -> ((k1)[x16])[x17] | x18 < [24, 24] }) | x17 < [5, 5]})) + ({ x19 -> (b1)[x16] | x19 < [24, 24] }) | x16 < [6] };
printf("c11\n");
print(c11);
/* Shape [6, 24, 24] */
c1 = logistics(c11);
printf("c1\n");
print(c1);
/* Shape [6, 12, 12] */
s1 = { x13 -> { x14 -> (sumOuter(2, { x15 -> (selb((c1)[x13], x14, [2, 2]))[x15] | x15 < [2, 2]})) / 4f | x14 < [12, 12] } | x13 < [6] };
printf("s1\n");
print(s1);
/* Shape [12, 1, 8, 8] */
c21 = { x9 -> (sumOuter(3, { x10 -> (slide(x10, s1, [0, 7, 7])) * ({ x11 -> ((k2)[x9])[x10] | x11 < [1, 8, 8] }) | x10 < [6, 5, 5]})) + ({ x12 -> (b2)[x9] | x12 < [1, 8, 8] }) | x9 < [12] };
printf("c21\n");
print(c21);
/* Shape [12, 1, 8, 8] */
c2 = logistics(c21);
printf("c2\n");
print(c2);
/* Shape [12, 1, 4, 4] */
s2 = { x5 -> { x6 -> { x7 -> (sumOuter(2, { x8 -> (selb(((c2)[x5])[x6], x7, [2, 2]))[x8] | x8 < [2, 2]})) / 4f | x7 < [4, 4] } | x6 < [1] } | x5 < [12] };
printf("s2\n");
print(s2);
/* Shape [10, 1, 1, 1, 1] */
r1 = { x1 -> (sumOuter(4, { x2 -> (slide(x2, s2, [0, 0, 0, 0])) * ({ x3 -> ((fc)[x1])[x2] | x3 < [1, 1, 1, 1] }) | x2 < [12, 1, 4, 4]})) + ({ x4 -> (b)[x1] | x4 < [1, 1, 1, 1] }) | x1 < [10] };
printf("r1\n");
print(r1);
/* Shape [10, 1, 1, 1, 1] */
r = logistics(r1);
printf("r\n");
print(r);
ddr = 1f;
/* Shape [10, 1, 1, 1, 1] */
ddr1 = ddr * (r1 * (1f + -r1));
printf("ddr1\n");
print(ddr1);
/* Shape [12, 1, 4, 4] */
dds2 = sumOuter(1, { x1 -> sumOuter(4, { x2 -> backslide(x2, { x3 -> (((ddr1)[x1])[x3]) * (((fc)[x1])[x2]) | x3 < [1, 1, 1, 1] }, [12, 1, 4, 4]) | x2 < [12, 1, 4, 4]}) | x1 < [10]});
printf("dds2\n");
print(dds2);
/* Shape [12, 1, 8, 8] */
ddc2 = { x1 -> { x2 -> unblock({ x3 -> { x4 -> ((((dds2)[x1])[x2])[x3]) / 4f | x4 < [2, 2] } | x3 < [4, 4] }, [2, 2]) | x2 < [1] } | x1 < [12] };
printf("ddc2\n");
print(ddc2);
/* Shape [12, 1, 8, 8] */
ddc21 = ddc2 * (c21 * (1f + -c21));
printf("ddc21\n");
print(ddc21);
/* Shape [6, 12, 12] */
dds1 = sumOuter(1, { x1 -> sumOuter(3, { x2 -> backslide(x2, { x3 -> (((ddc21)[x1])[x3]) * (((k2)[x1])[x2]) | x3 < [1, 8, 8] }, [6, 12, 12]) | x2 < [6, 5, 5]}) | x1 < [12]});
printf("dds1\n");
print(dds1);
/* Shape [6, 24, 24] */
ddc1 = { x1 -> unblock({ x2 -> { x3 -> (((dds1)[x1])[x2]) / 4f | x3 < [2, 2] } | x2 < [12, 12] }, [2, 2]) | x1 < [6] };
printf("ddc1\n");
print(ddc1);
/* Shape [6, 24, 24] */
ddc11 = (ddc1) * (c11 * (1f + -c11));
printf("ddc11\n");
print(ddc11);
ddb = { x1 -> sumOuter(4, { x2 -> ((ddr1)[x1])[x2] | x2 < [1, 1, 1, 1]}) | x1 < [10] };
printf("ddb\n");
print(ddb);
ddfc = { x1 -> { x2 -> sumOuter(4, { x3 -> (((ddr1)[x1])[x3]) * ((slide(x2, s2, [0, 0, 0, 0]))[x3]) | x3 < [1, 1, 1, 1]}) | x2 < [12, 1, 4, 4] } | x1 < [10] };
printf("ddfc\n");
print(ddfc);
ddb2 = { x1 -> sumOuter(3, { x2 -> ((ddc21)[x1])[x2] | x2 < [1, 8, 8]}) | x1 < [12] };
printf("ddb2\n");
print(ddb2);
ddk2 = { x1 -> { x2 -> sumOuter(3, { x3 -> (((ddc21)[x1])[x3]) * ((slide(x2, s1, [0, 7, 7]))[x3]) | x3 < [1, 8, 8]}) | x2 < [6, 5, 5] } | x1 < [12] };
printf("ddk2\n");
print(ddk2);
ddb1 = { x1 -> sumOuter(2, { x2 -> ((ddc11)[x1])[x2] | x2 < [24, 24]}) | x1 < [6] };
printf("ddb1\n");
print(ddb1);
ddk1 = { x1 -> { x2 -> sumOuter(2, { x3 -> (((ddc11)[x1])[x3]) * ((slide(x2, inp, [23, 23]))[x3]) | x3 < [24, 24]}) | x2 < [5, 5] } | x1 < [6] };
printf("ddk1\n");
print(ddk1);
ddinp = sumOuter(1, { x1 -> sumOuter(2, { x2 -> backslide(x2, { x3 -> (((ddc11)[x1])[x3]) * (((k1)[x1])[x2]) | x3 < [24, 24] }, [28, 28]) | x2 < [5, 5]}) | x1 < [6]});
printf("ddinp\n");
print(ddinp);
return 0;
}