1 /*
2 N-Body integrator using leapfrog scheme
3
4 C99 version, based on C++ version
5 Jeroen Bédorf
6 4-Dec-2010
7
8 Compile as:
9
10 g++ -O4 nbabel.c -o nbabel
11 or
12 gcc -std=c99 nbabel.c -o nbabel -lm
13
14
15 Run as:
16
17 more input128 | ./nbabel
18
19 */
20
21
22 #include <math.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26
27 typedef double real;
28 typedef real real3[3];
29
30 //Global variables
31 int n; //Number of particles
32 real *m; //Masses array
33 real3 *r; //Positions
34 real3 *v; //Velocities
35 real3 *a; //Accelerations
36 real3 *a0; //Prev accelerations
37
38
39 void acceleration()
40 {
41 for(int i=0; i < n; i++)
42 {
43 a[i][0] = a[i][1] = a[i][2] = 0; //Reset acceleration
44 }
45
46 //For each star
47 for(int i=0; i < n; i++)
48 {
49 real rij[3];
50
51 //For each remaining star
52 //for(int j=i+1; j < n; j++)
53 for(int j=0; j < n; j++)
54 {
55 if(i != j) //prevent self interaction
56 {
57 //Distance between the two stars
58 rij[0] = r[i][0] - r[j][0];
59 rij[1] = r[i][1] - r[j][1];
60 rij[2] = r[i][2] - r[j][2];
61
62 real RdotR = (rij[0]*rij[0]) + (rij[1]*rij[1]) + (rij[2]*rij[2]);
63 real apre = 1.0 / sqrt(RdotR*RdotR*RdotR);
64
65 //Update acceleration
66 a[i][0] -= m[j]*apre*rij[0];
67 a[i][1] -= m[j]*apre*rij[1];
68 a[i][2] -= m[j]*apre*rij[2];
69 }//end i!=j
70 }//end for j
71 }//end for i
72 }//end acceleration
73
74
75 //Update positions
76 void updatePositions(real dt)
77 {
78 for(int i=0; i < n; i++)
79 {
80 //Update the positions, based on the calculated accelerations and velocities
81 a0[i][0] = a[i][0]; a0[i][1] = a[i][1]; a0[i][2] = a[i][2];
82
83 //for each axis (x/y/z)
84 r[i][0] += dt*v[i][0] + 0.5*dt*dt*a0[i][0];
85 r[i][1] += dt*v[i][1] + 0.5*dt*dt*a0[i][1];
86 r[i][2] += dt*v[i][2] + 0.5*dt*dt*a0[i][2];
87 }
88 }
89
90 //Update velocities based on previous and new accelerations
91 void updateVelocities(real dt)
92 {
93 //Update the velocities based on the previous and old accelerations
94 for(int i=0; i < n; i++)
95 {
96 v[i][0] += 0.5*dt*(a0[i][0]+a[i][0]);
97 v[i][1] += 0.5*dt*(a0[i][1]+a[i][1]);
98 v[i][2] += 0.5*dt*(a0[i][2]+a[i][2]);
99
100 a0[i][0] = a[i][0]; a0[i][1] = a[i][1]; a0[i][2] = a[i][2];
101 }
102 }
103
104 //Compute the energy of the system,
105 //contains an expensive O(N^2) part which can be moved to the acceleration part
106 //where this is already calculated
107 void energies(real *EKin, real *EPot)
108 {
109 real rij[3];
110 real tempKin = 0;
111 real tempPot = 0;
112
113
114 //Kinetic energy
115 for(int i=0; i < n; i++)
116 {
117 real tempV = v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2];
118 tempKin += 0.5*m[i]*tempV;
119 }
120
121 //Potential energy
122 for(int i=0; i < n; i++)
123 {
124 for(int j=i+1; j < n; j++)
125 {
126 //Distance between the two stars
127 rij[0] = r[i][0] - r[j][0];
128 rij[1] = r[i][1] - r[j][1];
129 rij[2] = r[i][2] - r[j][2];
130
131 real tempRij = (rij[0]*rij[0]) + (rij[1]*rij[1]) + (rij[2]*rij[2]);
132 tempPot -= m[i]*m[j]/sqrt(tempRij);
133 }//end for j
134 }//end for i
135
136 *EKin = tempKin;
137 *EPot = tempPot;
138 }//end energies
139
140
141 int main() {
142
143 int nAlloc = 16384; //Allocated memory size
144
145 m = (real* )malloc(sizeof(real )*nAlloc);
146 r = (real3*)malloc(sizeof(real3)*nAlloc);
147 v = (real3*)malloc(sizeof(real3)*nAlloc);
148 a = (real3*)malloc(sizeof(real3)*nAlloc);
149 a0 = (real3*)malloc(sizeof(real3)*nAlloc);
150
151 real mass;
152 int dummy;
153
154 real rx,ry,rz,vx,vy,vz;
155
156 int count = 0;
157 while(scanf("%d %lf %lf %lf %lf %lf %lf %lf\n", &dummy, &mass,
158 &rx, &ry, &rz, &vx, &vy, &vz) != EOF)
159 {
160 m[count] = mass;
161 r[count][0] = rx;
162 r[count][1] = ry;
163 r[count][2] = rz;
164 v[count][0] = vx;
165 v[count][1] = vy;
166 v[count][2] = vz;
167 //printf("%d %d %f %f %f %f %f %f %f\n", count++, dummy, m,
168 // rx, ry, rz, vx, vy, vz);
169 count++;
170
171 //See if we need to allocate more memory
172 if(count == nAlloc)
173 {
174 nAlloc += 1024;
175 m = (real *)realloc(m, sizeof(real )*nAlloc);
176 r = (real3*)realloc(r, sizeof(real3)*nAlloc);
177 v = (real3*)realloc(r, sizeof(real3)*nAlloc);
178 a = (real3*)realloc(r, sizeof(real3)*nAlloc);
179 a0 = (real3*)realloc(r, sizeof(real3)*nAlloc);
180 }
181 }
182
183 //Set number of items in the system
184 n = count;
185
186 //Compute initial energu of the system
187 real kinEnergy, potEnergy, totEnergy0;
188 energies(&kinEnergy, &potEnergy);
189 printf("Energies: %lf %lf %lf \n", kinEnergy+potEnergy, kinEnergy, potEnergy);
190 totEnergy0 = kinEnergy+potEnergy;
191
192 //Start time, end time and simulation step
193 real t = 0.0;
194 real tend = 1.0;
195 real dt = 1e-3;
196 int k = 0;
197
198 //Initialize the accelerations
199 acceleration();
200
201 //Start the main loop
202 while (t<tend)
203 {
204 //Update positions based on velocities and accelerations
205 updatePositions(dt);
206
207 //Get new accelerations
208 acceleration();
209
210 //Update velocities
211 updateVelocities(dt);
212
213 t += dt;
214 k += 1;
215 if (k%10==0)
216 {
217 energies(&kinEnergy, &potEnergy);
218 real totEnergy = kinEnergy+potEnergy;
219 real dE = (totEnergy-totEnergy0)/totEnergy0;
220 printf("t= %lg E=: %lg %lg %lg dE = %lg\n",t, totEnergy, kinEnergy,
221 potEnergy, dE);
222 }
223 } //end while
224
225 return 1;
226 } //end program
227
228
229
NBabel.cpp
Date: Dec 2010
Author: Jeroen Bédorf (Sterrewacht Leiden, bedorf@strw.leidenuniv.nl)
Integration scheme: Predictor-corrector leapfrog
Compiler: gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5)
Operating system: Ubuntu Linux 10.10 (2.6.35-30-generic)
Hardware: Intel Core I7 CPU M640 2.80GHz
Input file: inputx (Plummer distribution of x equal mass particles)
Time step: constant and shared 1.e-3 N-body unit
Integration from t=0 to t=1.0
Performance:
N Optimizaltion tCPU dE/E
16 - 0.02 0.0082104
16 O4 0.0 0.0082104
32 - 0.05 7.33935e-07
32 O4 0.01 7.33935e-07
64 - 0.16 3.59673e-06
64 O4 0.08 3.59673e-06
128 - 0.64 1.72732e-06
128 O4 0.3 1.72732e-06
256 - 2.58 -0.000668772
256 O4 1.17 -0.000668772
512 - 10.25 -0.0422243
512 O4 4.56 -0.0422243
1024 - 40.84 -0.021317
1024 O4 18.7 -0.021317
2048 - 162.73 -0.00857556
2048 O4 74.83 -0.00857556
4096 - 654.96 -0.00257437
4096 O4 295.77 -0.00257437