-
Notifications
You must be signed in to change notification settings - Fork 1
/
mpi-launch.sh
executable file
·147 lines (126 loc) · 3.46 KB
/
mpi-launch.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env bash
cd "$(dirname "$0")"
# Check if MPI environment is loaded
if ! [ -x "$(command -v mpirun)" ]; then
source ../setup-hpcx.sh
fi
source ./config
# Default command line argument
YALLA=false
UCX=false
PROGRAM="benchmark"
# Parse command line argument
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-y|--yalla)
YALLA=true
shift # past argument
;;
-u|--ucx)
UCX=true
shift # past argument
;;
-m|--mode)
MODE="$2"
shift # past argument
shift # past value
;;
-p|--program)
PROGRAM="$2"
if [[ $PROGRAM != "roter" && $PROGRAM != "benchmark" && $PROGRAM != "osu" ]]; then
>&2 echo "Invalid program \"$PROGRAM\""
exit 2
fi
shift # past argument
shift # past value
;;
*)
>&2 echo "Unrecognized option $key"
exit 2
esac
done
# Config
hostfile=$HOSTS_PATH
hosts=$(cat $hostfile| paste -s -d "," -)
np=$(cat $hostfile | wc -l)
# Machine-dependent variables
hcaid=$(ibv_devinfo | grep hca_id | awk '{ print $2 }')
# From https://community.mellanox.com/docs/DOC-3076#jive_content_id_Running_MPI
: '
HCAS="$hcaid:1"
FLAGS+="-mca btl_openib_warn_default_gid_prefix 0 "
FLAGS+="-mca btl_openib_warn_no_device_params_found 0 "
FLAGS+="--report-bindings --allow-run-as-root -bind-to core "
FLAGS+="-mca coll_fca_enable 0 -mca coll_hcoll_enable 0 "
if $YALLA; then
FLAGS+="-mca pml yalla "
fi
if $UCX; then
FLAGS+="-mca pml ucx -mca osc ucx "
fi
FLAGS+="-mca mtl_mxm_np 0 -x MXM_TLS=ud,shm,self -x MXM_RDMA_PORTS=$HCAS "
FLAGS+="-x MXM_LOG_LEVEL=ERROR -x MXM_IB_PORTS=$HCAS "
FLAGS+="-x MXM_IB_MAP_MODE=round-robin -x MXM_IB_USE_GRH=y "
#'
# From Max
#FLAGS+="-mca btl_openib_receive_queues P,65536,256,192,128:S,128,256,192,128:S,2048,1024,1008,64:S,12288,1024,1008,64:S,65536,1024,1008,64 "
# Mellanox recommended flags
FLAGS+="--map-by node "
#FLAGS+="-mca pml yalla "
FLAGS+="-mca coll_hcoll_enable 0 "
FLAGS+="-mca pml ob1 --mca btl openib,self,vader --mca btl_openib_cpc_include rdmacm --mca btl_openib_rroce_enable 1 -mca btl_openib_receive_queues P,65536,256,192,128:S,128,256,192,128:S,2048,1024,1008,64:S,12288,1024,1008,64:S,65536,1024,1008,64 "
run_roter_test() {
execname=~/Source/rotornet-mpi/rlb_v1/rotor_test
mpirun -np $np --host $hosts $FLAGS $execname
}
run_microbenchmark() {
execname=$BENCH_EXEC_DIR/mpi_exec
# Executable flags
l=$((1*32))
#limit=1024
limit=$((1*32))
#limit=$((1024*1024*1024))
if [[ $MODE = latency ]]; then
count=1
repeat=1
elif [[ $MODE = throughput ]]; then
count=1000
repeat=10
else
>&2 echo "Invalid mode \"$MODE\""
exit 2
fi
direction="1-N"
warmup=0
mr_count=1
set -x
while [[ $l -le $limit ]]; do
echo "Length = $l ..."
execflags="-b $l -c $count -r $repeat -m $MODE -w $warmup --mr_count=$mr_count --direction=$direction"
mpirun -np $np --host $hosts $FLAGS $execname $execflags
(( l *= 2 ))
echo ""
done
}
run_osu_benchmark() {
execname=/usr/mpi/gcc/openmpi-3.1.1rc1/tests/osu-micro-benchmarks-5.3.2/osu_bw
mpirun -np $np --host $hosts $FLAGS $execname
}
# Launch MPI job
set -e
case $PROGRAM in
roter)
run_roter_test
;;
benchmark)
run_microbenchmark
;;
osu)
run_osu_benchmark
;;
*)
>&2 echo "Program not specified ..."
exit 2
esac