-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubmit_2.sh
43 lines (33 loc) · 918 Bytes
/
submit_2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/sh
#PBS -l select=2:system=polaris
#PBS -l place=scatter
##PBS -l walltime=4:00:00
#PBS -l walltime=1:00:00
#PBS -l filesystems=home:eagle
##PBS -q preemptable
#PBS -q debug
#PBS -A datascience
#PBS -N GNN_DDP_2
# Change to working directory
cd ${PBS_O_WORKDIR}
TSTAMP=$(date "+%Y-%m-%d-%H%M%S")
echo "Job started at: {$TSTAMP}"
# Load modules:
source /lus/eagle/projects/datascience/sbarwey/codes/ml/pytorch_geometric/module_config
# Get number of ranks
NUM_NODES=$(wc -l < "${PBS_NODEFILE}")
# Get number of GPUs per node
NGPUS_PER_NODE=$(nvidia-smi -L | wc -l)
# Get total number of GPUs
NGPUS="$((${NUM_NODES}*${NGPUS_PER_NODE}))"
# Print
echo $NUM_NODES $NGPUS_PER_NODE $NGPUS
# run
mpiexec \
--verbose \
--envall \
-n $NGPUS \
--ppn $NGPUS_PER_NODE \
--hostfile="${PBS_NODEFILE}" \
--cpu-bind none \
./set_affinity_gpu_polaris.sh python3 main.py seed=65 use_noise=True topk_rf=8