-
Notifications
You must be signed in to change notification settings - Fork 0
/
sparkcluster_and_driver.job
65 lines (53 loc) · 1.46 KB
/
sparkcluster_and_driver.job
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/sh
# Grid Engine options (lines prefixed with #$)
#$ -N Spark_Cluster_And_Query
#$ -cwd
#$ -l h_rt=00:59:00
#$ -l h_vmem=4G
#$ -pe mpi 48
#$ -R y
# Initialise the environment modules
. /etc/profile.d/modules.sh
# Load Python
module load python/2.7.10
# Run the program
export SPARK_HOME=${HOME}/spark-2.4.0-bin-hadoop2.7
cd $HOME/bash_scripts
rm -f machines.log
rm -f machines_uniq.log
rm -f master.log
rm -f driver.log
rm -f worker.log
NUM_NODES=$NSLOTS
NUM=1
while test $NUM -le $NSLOTS
do
STRING1=`head -n $NUM $PE_HOSTFILE | tail -1 | awk '{print $1}'`
echo "$STRING1" >> machines.log
NUM=`expr $NUM + 1`
done
nodes=($( cat machines.log | sort | uniq ))
# start resource manager only once
echo "`hostname`" > master.log
./start_master.sh
mastername=$(cat "master.log")
echo "Started master on" $mastername
# getting the driver node - only one
for each in "${nodes[@]}"
do
if [ $each != $mastername ] ; then
echo $each > driver.log
fi
done
drivername=$(cat "driver.log")
# start workers in all the nodes except the one where the master and driver were started
for i in "${nodes[@]}"
do
echo $i
ssh $i "cd $HOME/bash_scripts; ./start_worker.sh $mastername $drivername" &
done
######### submiting Spark PI application from the driver ##########
ssh $drivername "./spark-Pi.sh $mastername"
######### submiting a Spark text mining query from the driver ##########
# ssh $drivername "./spark-textming.sh $mastername"
sleep 1h