Veera Muangsin
http://space.cp.eng.chula.ac.th/documents/parallelguide.html
Revision 1.01
25 April 2003
This document is a user guide for students and faculty staff who want to use parallel computer facilities at the department. It is originally written for students of 2110412 and 2110732 Parallel Computing courses. It is divided into the following sections:
To run VNC,
Edit ~/.vnc/xstartup. It looks like this:
xrdb $HOME/.Xresources
xsetroot -solid grey
xterm -geometry 80x24+10+10 -ls -title "$VNCDESKTOP Desktop" &
twm &
Change the last line to
/usr/dt/bin/dtwm &
#!/bin/bash
#PBS -l cput=00:01:00
#PBS -o test.out
#PBS -e test.err
#PBS -m ea
echo Working directory is $PBS_O_WORKDIR > test.out
cd $PBS_O_WORKDIR
echo `date` `hostname` `pwd` >> output
./myprogram
Here is the explanation, line by line.
#!/bin/bash
Like all shell scripts, this line tells what shell you want to run.
In this case, it is Bash shell.
#PBS -l cput=00:01:00
A line that begins with #PBS is a command for PBS. If the
script is run directly on a shell, it is interpreted as a comment line.
This line tells PBS that your job is expected to run for one minute. It
is required by PBS to determine the size of your job.
You should estimate the duration of your job as accurately as possible
for efficient scheduling. If you underestimate, your job will be terminated
by PBS when the time runs out. If you overestimate, your job may be started
later than it should be.
#PBS -o test.out
#PBS -e test.err
These two lines redirect standard output (-o) and standard
error (-e) to the specified files. Since the job will be run in
the background, you need to write the screen output into a file instead.
#PBS -m ea
This tells PBS to send you an e-mail when the job ends (e)
or aborts (a).
echo Working directory is $PBS_O_WORKDIR
cd $PBS_O_WORKDIR
PBS has some built-in environment variables, for example, $PBS_O_WORKDIR
is the working directory.
echo `date` `hostname` `pwd` >> output
The output is written into a file named 'output'.
Job id
Name
User
Time Use S Queue
---------------- ---------------- ---------------- -------- - -----
1233.apollo1 hahaha.pbs
user001 01:25:05
R short
1234.apollo1 test.pbs
user002 00:00:10
R short
1235.apollo1 hohoho.pbs
user003
0 Q short
It shows that the jobs of user001 and user002 have been running, and the job of user003 is waiting in the queue.
ssh-keygen -t dsaWhen you are asked to enter a passphrase, just hit return.
cp ~/.ssh/id_dsa.pub ~/.ssh/authorized_keys2
For example, to make MPICH allocate a process on each processor of the Sun machines (each machine has four processors), edit a machine file named machines.sun which contains:
zeus
zeus
zeus
zeus
athena
athena
athena
athena
For the Apollo cluster, create a file named machines.apollo which contains:
apollo2
apollo3
apollo4
apollo5
apollo6
Frequently used command:
mpirun -machinefile <machine_file> -np <number_of_processes>
<executable_program>
For example:
mpirun -machinefile machines.sun -np 8 myprog.sun
mpirun -machinefile machines.apollo -np 8 myprog.linux
A Simple MPI Program: Just say hi (greeting.c)
#include <stdio.h>
#include <mpi.h>
main(int argc, char** argv) {
int my_rank;
/* Rank of process */
int p;
/* Number of processes */
int source;
/* Rank of sender */
int dest;
/* Rank of receiver */
int tag = 50;
/* Tag for messages */
char message[100]; /* Storage for the
message */
char name[32];
/* Processor name */
int name_len;
MPI_Status status; /* Return status for
receive */
printf("start\n");
MPI_Init(&argc, &argv);
printf("MPI_Init OK\n");
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Get_processor_name(name, &name_len);
if (my_rank != 0) {
sprintf(message, "Greetings
from process %d at %s!", my_rank, name);
dest = 0;
MPI_Send(message, strlen(message)+1,
MPI_CHAR, dest,
tag, MPI_COMM_WORLD);
} else {
for (source = 1; source
< p; source++) {
MPI_Recv(message, 100, MPI_CHAR, source, tag,
MPI_COMM_WORLD, &status);
printf("%s\n", message);
}
}
MPI_Finalize();
} /* main */
Another Program: Summation (sum.c)
#include "mpi.h"
#include <stdio.h>
#include <math.h>
#define MAXSIZE 1000
int main(int argc, char *argv)
{
int myid, numprocs;
int data[MAXSIZE], i, n, x, low, high, myresult,
result;
double start, stop;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
n = MAXSIZE;
if (myid == 0) { /* initialize data */
for(i = 0; i < n;
i++) {
data[i] = 1;
}
}
if (myid == 0 ) start = MPI_Wtime();
/* broadcast data */
MPI_Bcast(data, n, MPI_INT, 0, MPI_COMM_WORLD);
/* Add my portion of data */
x = n/numprocs;
low = myid * x;
high = low + x;
for(i = low; i < high; i++) {
compute();
/* Do some computation. */
myresult += data[i];
}
printf("I got %d from %d\n", myresult, myid);
/* Compute global sum */
MPI_Reduce(&myresult, &result, 1, MPI_INT,
MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0) printf("The sum is %d.\n", result);
if (myid == 0) {
stop = MPI_Wtime();
printf("parallel section
time %f", stop-start);
}
MPI_Finalize();
}
compute()
{
int i;
for (i=0; i<1000000; i++);
/* Nothing useful. Just to make timing
more interesting */
}
One more: Calculating Pi (pi.c)
#include "mpi.h"
#include <stdio.h>
#include <math.h>
double f( double );
double f( double a )
{
return (4.0 / (1.0 + a*a));
}
int main( int argc, char *argv[])
{
int done = 0, n, myid, numprocs, i;
double PI25DT = 3.141592653589793238462643;
double mypi, pi, h, sum, x;
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stderr,"Process %d on %s\n",
myid, processor_name);
n = 0;
while (!done)
{
if (myid == 0)
{
/*
printf("Enter the number of intervals: (0 quits) ");
scanf("%d",&n);
*/
if (n==0) n=100; else n=0;
startwtime = MPI_Wtime();
}
MPI_Bcast(&n, 1,
MPI_INT, 0, MPI_COMM_WORLD);
if (n == 0)
done = 1;
else
{
h = 1.0 / (double) n;
sum = 0.0;
for (i = myid + 1; i <= n; i += numprocs)
{
x = h * ((double)i - 0.5);
sum += f(x);
}
mypi = h * sum;
MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0)
{
printf("pi is approximately %.16f, Error is %.16f\n",
pi, fabs(pi - PI25DT));
endwtime = MPI_Wtime();
printf("wall clock time = %f\n",
endwtime-startwtime);
}
}
}
MPI_Finalize();
return 0;
}