#!/bin/bash 

if [ $# -ne 3 ]; then
    echo $0: usage: nvprof.run.sh.gtc \(#nodes\) \(#process per node\)
    exit 1
fi

nodes=$1
ppn=$2
walltime=$3
let nmpi=$nodes*$ppn
let ompthreads=20/$ppn
#--------------------------------------
cat >batch.job <<EOF
#BSUB -o %J.out
#BSUB -e %J.err
#BSUB -R "span[ptile=${ppn}]"
#BSUB -n ${nmpi}
#BSUB -q batch
#BSUB -P csc190gtc
#BSUB -env "all, JOB_FEATURE=gpumps"
#BSUB -W ${walltime} 
#---------------------------------
export OMP_NUM_THREADS=${ompthreads}
#mpirun -gpu -np ${nmpi} ./set_device.sh ./gtc
ulimit -s 10240

mkdir -p restart_dir1
mkdir -p restart_dir2
mkdir -p restart_dir

mkdir -p phi_dir
mkdir -p trackp_dir

mpirun --gpu --bind-to core -np ${nmpi} ./set_device.sh ./mpi_bind.sh nvprof --process-name "MPI Rank %q{OMPI_COMM_WORLD_RANK}" --context-name "MPI Rank %q{OMPI_COMM_WORLD_RANK}" -o timeline.%q{OMPI_COMM_WORLD_RANK}.nvprof ./gtc
EOF
#---------------------------------------
bsub  <batch.job
echo total MPI ranks: $nmpi
echo OpenMP threads per process: $ompthreads
