nbody_seq.c 3.26 KB
Newer Older
Thomas Koopman's avatar
Thomas Koopman committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/time.h>
#include <omp.h>

#define EPSILON2 0x1p-53

#define min(a, b) ((a) < (b) ? a : b)

typedef struct {
    double x;
    double y;
    double z;
} Point;

typedef struct {
    double *x;
    double *y;
    double *z;
} Points;

Points alloc_points(int n)
{
    Points p;
    p.x = malloc(n * sizeof(double));
    p.y = malloc(n * sizeof(double));
    p.z = malloc(n * sizeof(double));
    return p;
}

void free_points(Points p)
{
    free(p.x);
    free(p.y);
    free(p.z);
}

void init(Points positions, Points velocities, double *masses, size_t n)
{
    for (size_t i = 0; i < n; i++) {
        positions.x[i] = i;
        positions.y[i] = 2.0 * i;
        positions.z[i] = 3.0 * i;
        velocities.x[i] = 0.0;
        velocities.y[i] = 0.0;
        velocities.z[i] = 0.0;
        masses[i] = 1.0;
    }
}

double pow3(double x)
{
    return x * x * x;
}

/* Advances the n-bodies in place. accel is a buffer, does not need to be
 * initialized.
 * 19n^2 + 12n flops */
void advance(Points positions, Points velocities, double *masses,
             double dt, size_t n)
{
    #pragma omp parallel for
    for (size_t i = 0; i < n; i++) {
        double ax = 0.0;
        double ay = 0.0;
        double az = 0.0;
        /* Loop body is 19 flops (assuming masses[j] / norm is computed
         * only once) */
        for (size_t j = 0; j < n; j++) {
            double bufx = positions.x[j] - positions.x[i];
            double bufy = positions.y[j] - positions.y[i];
            double bufz = positions.z[j] - positions.z[i];
            /* norm = ||positions[i] - positions[j]||^3 */
            double norm = pow3(sqrt(bufx * bufx +
                                    bufy * bufy +
                                    bufz * bufz) + EPSILON2);
            ax += bufx * masses[j] / norm;
            ay += bufy * masses[j] / norm;
            az += bufz * masses[j] / norm;
        }
        velocities.x[i] += ax * dt;
        velocities.y[i] += ay * dt;
        velocities.z[i] += az * dt;
        positions.x[i] += velocities.x[i] * dt;
        positions.y[i] += velocities.y[i] * dt;
        positions.z[i] += velocities.z[i] * dt;
    }
}

int main(int argc, char **argv)
{
    if (argc != 3) {
        printf("Usage: n, iterations\n");
        return EXIT_FAILURE;
    }

    size_t n = atol(argv[1]);
    int iterations = atoi(argv[2]);

    Points positions = alloc_points(n);
    Points velocities = alloc_points(n);
    double *masses = (double *)malloc(n * sizeof(double));

    init(positions, velocities, masses, n);

    struct timeval tv1, tv2;
    gettimeofday(&tv1, NULL);
    for (int i = 0; i < iterations; i++) {
        advance(positions, velocities, masses, 0.01, n);
    }
    gettimeofday(&tv2, NULL);
    double duration = (double) (tv2.tv_usec - tv1.tv_usec) / 1e6 +
                      (double) (tv2.tv_sec - tv1.tv_sec);

    fprintf(stderr, "nbody with %zu bodies, %d iterations.\n"
                    "This took %lfs.\n"
                    "Compute rate in Gflops/s: ",
                    n, iterations, duration);
    printf("%lf\n", (19.0 * n * n + 12.0 * n) * iterations / 1e9 / duration);

    free_points(positions);
    free_points(velocities);
    free(masses);

    return EXIT_SUCCESS;
}