Parallel Programming on the Cluster

  • You have the choice to use either /opt/mpich2-1.2.1p1 or /opt/mpich2 (not both at a time)
  • You need a parallel source code in C/Fortran (demo program is provided below)

Running demo C program on the cluster

pi.c

#include "mpi.h"
#include <stdio.h>
#include <math.h>
int main( int argc, char *argv[] )
{

int i, n, myid, numprocs;
double PI25DT = 3.141592653589793238462643;
double mypi, pi, h, sum, x;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
for(n=300000;n>=0;n--){
//while (1) {
if (myid == 0) {
printf("Enter the number of intervals: (0 quits) ");
//scanf("%d",&n);
// for(n=123; n<30000000; n++);
// n = 0;
}

MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (n == 0)
break;
else {
h   = 1.0 / (double) n;
sum = 0.0;
for (i = myid + 1; i <= n; i += numprocs) {
x = h * ((double)i - 0.5);
sum += (4.0 / (1.0 + x*x));
}
mypi = h * sum;
MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0,
MPI_COMM_WORLD);
if (myid == 0) 
printf("pi is approximately %.16f, Error is %.16f\n",
pi, fabs(pi - PI25DT));
}
}
MPI_Finalize();
return 0;
}

          mpicc -I/opt/mpich2-1.2.1p1/include -g -o newmpich2_out pi.c

          mpicc -I/opt/mpich2/gnu/include -g -o pi2_notinscalemp_out pi.c

#!/bin/sh
#My program's name
#PBS -N testing_mpich2

#Request 4node 10 proc each, 40 proc total for 0 hours, 10 minutes, 0 seconds.
#PBS -l nodes=4:ppn=10,walltime=00:10:00

# The output of stdout is sent to outputFile
#PBS -o outputFile
# The output of stderr is sent to errorFile
#PBS -e errorFile

#following 2 lines ensures you'll be notified by email when job is done
#PBS -M au_user_id@auburn.edu
#PBS -m e

#load library path
export LD_LIBRARY_PATH=/opt/mpich2-1.2.1p1/lib:$LD_LIBRARY_PATH
#load executable path
export PATH=/opt/mpich2-1.2.1p1/bin:$PATH

#print host name to see which node is computing
/bin/hostname > out

#initital timestamp
date >> out

#generate node file
`sort -u $PBS_NODEFILE > mpd_nodes`
#assign variable nhosts to number of hosts
nhosts=`cat mpd_nodes | wc -l`

#generate processor file and count it and assign to proc
`sort $PBS_NODEFILE > conf_file`
proc=`cat conf_file | wc -l`

#start mpd on number of hosts(=nhosts) given by file mpd_nodes
mpdboot -n $nhosts -v -f mpd_nodes

#start mpiexec on nhosts with np=proc ./source_file >> outputfile
mpiexec -n $nhosts -np $proc ./newmpich2_out >> out

#stop mpd
mpdallexit

#!/bin/sh
#My program's name
#PBS -N testing_mpich2

#Request 4node 10 proc each, 40 proc total for 0 hours, 10 minutes, 0 seconds.
#PBS -l nodes=4:ppn=10,walltime=00:10:00

# The output of stdout is sent to outputFile
#PBS -o outputFile
# The output of stderr is sent to errorFile
#PBS -e errorFile

#following 2 lines ensures you'll be notified by email when job is done
#PBS -M au_user_id@auburn.edu
#PBS -m e

#load library path
export LD_LIBRARY_PATH=/opt/mpich2/gnu/lib:$LD_LIBRARY_PATH
#load executable path
export PATH=/opt/mpich2/gnu/bin:$PATH

#print host name to see which node is computing
/bin/hostname > out

#initital timestamp
date >> out

#generate node file
`sort -u $PBS_NODEFILE > mpd_nodes`
#assign variable nhosts to number of hosts
nhosts=`cat mpd_nodes | wc -l`

#generate processor file and count it and assign to proc
`sort $PBS_NODEFILE > conf_file`
proc=`cat conf_file | wc -l`

#start mpd on number of hosts(=nhosts) given by file mpd_nodes
mpdboot -n $nhosts -v -f mpd_nodes

#start mpiexec on nhosts with np=proc ./source_file >> outputfile
mpiexec -n $nhosts -np $proc ./pi2_notinscalemp_out >> out

#stop mpd
mpdallexit

  • Type "showq" to see your job is running
  • Type "pbsnodes -a" to see it was assigned to multiple nodes according to your request

Running demo Fortran program on the cluster

fortran.f

      program main
      include "mpif.h"
      double precision  PI25DT
      parameter        (PI25DT = 3.141592653589793238462643d0)
      double precision  mypi, pi, h, sum, x, f, a
      integer n, myid, numprocs, i, ierr
c                                 function to integrate
      f(a) = 4.d0 / (1.d0 + a*a)

      call MPI_INIT(ierr)
      call MPI_COMM_RANK(MPI_COMM_WORLD, myid, ierr)
      call MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, ierr)

 10   if ( myid .eq. 0 ) then
         print *, 'Enter the number of intervals: (0 quits) '
!         read(*,*) n
         n=3
      endif
c                                 broadcast n
      call MPI_BCAST(n,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
c                                 check for quit signal
      if ( n .le. 0 ) goto 30
c                                 calculate the interval size
      h = 1.0d0/n
      sum  = 0.0d0
      do 20 i = myid+1, n, numprocs
         x = h * (dble(i) - 0.5d0)
         sum = sum + f(x)
 20   continue
      mypi = h * sum
c                                 collect all the partial sums
      call MPI_REDUCE(mypi,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0,
     &                  MPI_COMM_WORLD,ierr)
c                                 node 0 prints the answer.
      if (myid .eq. 0) then
         print *, 'pi is ', pi, ' Error is', abs(pi - PI25DT)
      endif
      goto 10
 30   call MPI_FINALIZE(ierr)
      stop
      end

          mpif77 -I/opt/mpich2-1.2.1p1/include -g -o fortran_out

    or   mpif90 -I/opt/mpich2-1.2.1p1/include -g -o fortran_out

  • Execute the program using "qsub ./run_fortran.sh"  --  run_fortran.sh      

#!/bin/sh
#name the program
#PBS -N testing_fortran_mpich2

#Request 1node 10 proc each, 10 proc as total for 0 hours, 10 minutes, 0 seconds.
#PBS -l nodes=1:ppn=10,walltime=00:10:00

# The output of stdout is sent to outputFile
#PBS -o outputFile
# The output of stderr is sent to errorFile
#PBS -e errorFile

#following 2 lines ensures you'll be notified by email when job is done
#PBS -M au_user_id@auburn.edu
#PBS -m e

#your directory path that can be obtained by pwd
#set your directory path in the following line
#PBS -d /home/au_user_id/fortan/

#load library path
export LD_LIBRARY_PATH=/opt/mpich2-1.2.1p1/lib:$LD_LIBRARY_PATH
#load executable path
export PATH=/opt/mpich2-1.2.1p1/bin:$PATH

#print host name to see which node is computing
/bin/hostname > out

#initital timestamp
date >> out

#generate node file
`sort -u $PBS_NODEFILE > mpd_nodes`
#assign variable nhosts to number of hosts
nhosts=`cat mpd_nodes | wc -l`

#generate processor file and count it and assign to proc
`sort $PBS_NODEFILE > conf_file`
proc=`cat conf_file | wc -l`

#start mpd on number of hosts(=nhosts) given by file mpd_nodes
mpdboot -n $nhosts -v -f mpd_nodes

#start mpiexec on nhosts with np=proc ./source_file >> outputfile
mpiexec -n $nhosts -np $proc ./fortran_out >> out

#stop mpd
mpdallexit
date >> out

  • Type "showq" to see your job is running
  • Type "pbsnodes -a" to see it was assigned to multiple/single nodes according to your request

Running two or more parallel program simultaneously:

  • Either use /opt/mpich2-1.2.1p1 or /opt/mpich2/ for both of them (do not mixup)
  • Make two script
  • Modify mpd_nodes to mpd_nodes1 and nhosts to nhost1, for 2nd script make it mpd_nodes2 and nhosts2 respectively

          `sort -u $PBS_NODEFILE > mpd_nodes1`
           nhosts1=`cat mpd_nodes1 | wc -l`

  • Modify conf_file to conf_file1 and conf_file2 and proc to proc1 and proc2
  • Comment

           mpdboot -n $nhosts -v -f mpd_nodes

  • Comment last line

           mpdallexit

  • Using vi editor write a mpd_nodes file that has following 4 lines and save it

           compute-1
           compute-2
           compute-3
           compute-4

  • ssh to compute-1 using

          ssh compute-1

  • To start mpd manually execute the following command

          /opt/mpich2-1.2.1p1/bin/mpdboot -n 4 -v -f mpd_nodes

  • Type "exit" to exit from compute-1
  • Submit your jobs using "qsub ./script.sh"

           multiple_mpich2_script.sh

  • When all of your job is done, ssh compute-1 and type "/opt/mpich2-1.2.1p1/bin/mpdallexit" to stop mpd manually

For details about parallel programming, please visit

http://www.mcs.anl.gov/research/projects/mpi/usingmpi/examples/simplempi/main.htm

http://static.msi.umn.edu/tutorial/scicomp/general/intro_parallel_prog/content.html

https://computing.llnl.gov/tutorials/parallel_comp/