Description of the code:

This program is designed numerical simulate n particles that have random positions and random charges, and then to calculate the force associated with each particle. The code has 2 parts. The first runs on the 0 process, and the second part runs on all others. The part that runs on process 0 generates, writes to a file, and sends the point positions and charges to the other processors, after the calculation is done, the resulting force vectors are received, and wrote to a file. The second part of this program receives the point positions, and charges and calculates the force vectors. It then sends back the force vectors to the first part of the code.

This program was written in C using only the standard C libraries and mpich. Because of the size of the data, it was necessary to use memory allocation techniques instead of using static arrays. All math, array access, was done using pointers. Much care was taken to make sure that no duplicate or unused memory was used.

This code is not as perfect. Many things that could be done to improve the efficiency of this code. For instance, the random number generation could have been done in parallel, and the I/O could have been done in parallel. The send and receive portions of the part of the code that runs on process 0 were simply loops, using the blocking send and receive, this could have been improved by implementing more complex mpi functions to pass data efficiently. But, it is very clear that a majority of the work done by this program is in the math section that is done on the other processors.

The Code:

#include <stdio.h>

#include <time.h>

#include "mpi.h"

#include <stdlib.h>

main(int argc, char **argv)

{

int id,np,k,i,n,a,b,w,t;

float rand, finalx,finaly,finalz;

float *position;

float *vect;

float *charge;

FILE *initial;

FILE *final;

time_t end;

time_t begin;

float tempx, tempy, tempz, pointx, pointy, pointz, length, tempunitx,tempunity, tempunitz, magf, pointc, tempc;

MPI_Status status;

MPI_Init(&argc,&argv);

MPI_Comm_rank(MPI_COMM_WORLD, &id);

MPI_Comm_size(MPI_COMM_WORLD, &np);

n=atoi(argv[1]);

position=malloc(sizeof(float)*n*3);

charge=malloc(sizeof(float)*n);

vect=malloc(sizeof(float)*(n/(np-1))*3);

for(i=0;i<(n/(np-1)*3);i++)

{

*(vect+i)=0;

}

if(id==0)

{

begin=time(NULL);

initial=fopen("initial","w");

fprintf(initial,"x y z charge\n");

k=0;

for(i=0;i<n;i++)

{

rand = random();

*(position+k)=(rand/RAND_MAX)*10;

rand = random();

*(position+(k+1))=(rand/RAND_MAX)*10;

rand = random();

*(position+(k+2))=(rand/RAND_MAX)*10;

rand = random();

*(charge+i)=rand/RAND_MAX*2-1;

fprintf(initial,"%f %f %f %f\n",*(position+k),*(position+k+1),*(position+k+2),*(charge+i));

k=k+3;

}

fclose(initial);

printf("sending position and charge ");

for(i=1;i<np;i++)

{

printf(".");

MPI_Send(position,n*3,MPI_FLOAT,i,666,MPI_COMM_WORLD);

MPI_Send(charge,n,MPI_FLOAT,i,667,MPI_COMM_WORLD);

}

printf("\n Receiving");

final=fopen("final","w");

for(i=1;i<np;i++)

{

MPI_Recv(vect,((n/(np-1))*3),MPI_FLOAT,i,668,MPI_COMM_WORLD,&status);

b=0;

for(t=0;t<(n/(np-1));t++)

{

fprintf(final,"%f %f %f \n",*(vect+b),*(vect+(b+1)),*(vect+(b+2)));

b=b+3;

}

}

fclose(final);

end=time(NULL);

printf("it took %f seconds \n",difftime(end,begin));

}

else

{

MPI_Recv(position,n*3,MPI_FLOAT,0,666,MPI_COMM_WORLD,&status);

MPI_Recv(charge,n,MPI_FLOAT,0,667,MPI_COMM_WORLD,&status);

//printf("|%i| first is %f last is %f \n",id,*(charge),*(charge+(n-1)));

w=((id-1)*n/(np-1))*3;

//printf("|%i| Starting to work\n",id);

b=0;

for(i=0;i<n/(np-1);i++)

{

pointx=*(position+w);

pointy=*(position+(w+1));

pointz=*(position+(w+2));

pointc=*(charge+w/3);

t=0;

printf(".");

for(a=0;a<n;a++)

{

if(t!=(w))

{

//printf("accept|%i| w=%i t=%i \n",id,w,t);

tempx=*(position+t);

tempy=*(position+(t+1));

tempz=*(position+(t+2));

tempc=*(charge+a);

//printf("|%i| the point in question is #%i \n its coords are: %f %f %f \n we are working with point #%i its coords are: %f %f %f \n",id,i,pointx,pointy,pointz,a,tempx,tempy,tempz);

// find the distance between points.

length=sqrt((tempx-pointx)*(tempx-pointx)+(tempy-pointy)*(tempy-pointy)+(tempz-pointz)*(tempz-pointz));

//find the unit direction vector

tempunitx=(tempx-pointx)/length;

tempunity=(tempy-pointy)/length;

tempunitz=(tempz-pointz)/length;

//find the final force vector between point and temp

magf=(pointc*tempc)/(length*length);

tempunitx=tempunitx*magf;

tempunity=tempunity*magf;

tempunitz=tempunitz*magf;

//add the vector to the rest of the force vectors affecting point.

finalx=finalx+tempunitx;

finaly=finaly+tempunity;

finalz=finalz+tempunitz;

}

else

{

//printf("not accepted|%i| w=%i t=%i \n",id,w,t);

}

t=t+3;

}

w=w+3;

// printf("|%i| done with vector %i the result is:\n %f %f %f \n",id,(w-3)/3,finalx,finaly,finalz);

*(vect+(b))=finalx;

*(vect+(b+1))=finaly;

*(vect+(b+2))=finalz;

b=b+3;

finalx=0;

finaly=0;

finalx=0;

}

MPI_Send(vect,((n/(np-1))*3),MPI_FLOAT,0,668,MPI_COMM_WORLD);

}

}

The Result:

The following is a table of times in seconds that it took np processors to complete the problem size of n particles. 2,3,5,9 processors were used instead of 1,2,4,8 because in this program, process 0 does not take part in the math computation. Due to time constraints, and processor limitation, runs involving 2, 3 , and 5 processors were done on processors which rand at 600 mhz. The runs involving 9 processors were done on 800 mhz processors. This makes the times taken by runs done with 9 processors lower then they should be, and accounts for the jump in the speed up curves. Below is the table and the speedup curves. Drawn in, are the predicted curves if all runs had been done on the same type of processor. The runs done on 9 processors don’t mean much without comparison of other runs done with the same type of processor. But it can be seen that runs of 2,3,and 5 processors follow the general trend of parallel processing speed up curves.

Also attached are pictures attempting to show the results. It was hard to graphically show the vectors, but I tried, using matlab, and was only able to show them with using lines, in 2d. The first picture shows the random points in 3d. The second picture shows all the vectors projected on the x,y plane.

n\np | 2 | 3 | 5 | 9|

======

400 | 0 | 0 | 0 | 0

800 | 0 | 0 | 0 | 1

4000 | 10 | 5 | 3 | 1

8000 | 41 | 21 | 11 | 4

16000 | 174 | 82 | 42 | 18

32000 | 680 | 343 | 171 | 72