This repository has been archived by the owner on Jan 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
submit.sh
executable file
·116 lines (104 loc) · 3.29 KB
/
submit.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
# default values
MEMORY="4g"
SPARK_MASTER="local[*]"
CASSANDRA_HOST="localhost"
CURRENCY="BTC"
RAW_KEYSPACE="btc_raw"
TGT_KEYSPACE="btc_transformed"
BUCKET_SIZE=25000
BECH32_PREFIX=""
CHECKPOINT_DIR="file:///tmp/spark-checkpoint" # hdfs:// in cluster mode
if [ -z "$SPARK_HOME" ] ; then
echo "Cannot find Apache Spark. Set the SPARK_HOME environment variable." > /dev/stderr
exit 1;
fi
EXEC=$(basename "$0")
USAGE="Usage: $EXEC [-h] [-m MEMORY_GB] [-c CASSANDRA_HOST] [-s SPARK_MASTER] [--currency CURRENCY] [--raw_keyspace RAW_KEYSPACE] [--tgt_keyspace TGT_KEYSPACE] [--bucket_size BUCKET_SIZE] [--bech32-prefix BECH32_PREFIX] [--checkpoint-dir CHECKPOINT_DIR] [--coinjoin-filtering]"
# parse command line options
args=$(getopt -o hc:m:s: --long raw_keyspace:,tgt_keyspace:,bucket_size:,currency:,bech32_prefix:,checkpoint_dir,coinjoin_filtering: -- "$@")
eval set -- "$args"
while true; do
case "$1" in
-h)
echo "$USAGE"
exit 0
;;
-c)
CASSANDRA_HOST="$2"
shift 2
;;
-m)
MEMORY=$(printf "%dg" "$2")
shift 2
;;
-s)
SPARK_MASTER="$2"
shift 2
;;
--currency)
CURRENCY="$2"
shift 2
;;
--raw_keyspace)
RAW_KEYSPACE="$2"
shift 2
;;
--tgt_keyspace)
TGT_KEYSPACE="$2"
shift 2
;;
--bucket_size)
BUCKET_SIZE="$2"
shift 2
;;
--bech32_prefix)
BECH32_PREFIX="$2"
shift 2
;;
--checkpoint_dir)
CHECKPOINT_DIR="$2"
shift 2
;;
--) # end of all options
shift
if [ "x$*" != "x" ] ; then
echo "$EXEC: Error - unknown argument \"$*\"" >&2
exit 1
fi
break
;;
-*)
echo "$EXEC: Unrecognized option \"$1\". Use -h flag for help." >&2
exit 1
;;
*) # no more options
break
;;
esac
done
echo -en "Starting on $CASSANDRA_HOST with master $SPARK_MASTER" \
"and $MEMORY memory ...\n" \
"- currency: $CURRENCY\n" \
"- raw keyspace: $RAW_KEYSPACE\n" \
"- target keyspace: $TGT_KEYSPACE\n" \
"- bucket size: $BUCKET_SIZE\n" \
"- BECH32 prefix: $BECH32_PREFIX\n" \
"- checkpoint dir: $CHECKPOINT_DIR\n"
"$SPARK_HOME"/bin/spark-submit \
--class "org.graphsense.TransformationJob" \
--master "$SPARK_MASTER" \
--conf spark.executor.memory="$MEMORY" \
--conf spark.cassandra.connection.host="$CASSANDRA_HOST" \
--conf spark.sql.session.timeZone=UTC \
--conf spark.sql.extensions=com.datastax.spark.connector.CassandraSparkExtensions \
--packages com.datastax.spark:spark-cassandra-connector_2.12:3.2.0,graphframes:graphframes:0.8.2-spark3.2-s_2.12,org.rogach:scallop_2.12:4.1.0,joda-time:joda-time:2.10.10 \
target/scala-2.12/graphsense-transformation_2.12-1.5.0.jar \
--currency "$CURRENCY" \
--raw-keyspace "$RAW_KEYSPACE" \
--target-keyspace "$TGT_KEYSPACE" \
--bucket-size "$BUCKET_SIZE" \
--coinjoin-filtering \
--bech32-prefix "$BECH32_PREFIX" \
--checkpoint-dir "$CHECKPOINT_DIR"
exit $?