-
Notifications
You must be signed in to change notification settings - Fork 1
/
sparql-generate.sh
executable file
·208 lines (200 loc) · 6.93 KB
/
sparql-generate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/bin/bash
# This script assumes you have java installed in your system and the sparql-generate jars in /usr/local/lib/
function showhelp {
echo
echo "Script to automate sparql-generate executions. Version 0.1, 2020-01-31"
echo
echo "Usage $0 [OPTION]"
echo
# echo " -c, --config=<path> path for configuration file"
echo " -e, --inputextension <iext> extension of input files; adds input file with the same name as the query file, replacing .rqg by .<iext> (.ttl if not set)"
echo " -h, --help shows this help and exits"
echo " -i, --input <path> path for input file(s); can be a pattern"
echo " -j, --jar <path> path for sparql-generate jar file; overrides -v option"
echo " -o, --outputextension <oext> extension of output files; used in combination with -o"
echo " -p, --parameters <parameters> java parameters (default: -Xmx5000M)"
echo " -q, --query <path> path for query file; can be a pattern (default: query.rqg)"
echo " -s, --split <l> split input file(s) in files of l lines"
echo " -v, --verbose verbose output"
echo " -V, --version [1|2] sparql-generate version to use. 1 for 1.1 and 2 for 2.0 (default: 1)"
echo
echo "Important note: if a pattern is used as parameter for query or input files, it must be provided inside double quotes (\")"
echo
}
function buildcommand {
jarcommand="java $javaparameters"
if [[ $givenjar -ne "" ]]; then
jarcommand="$jarcommand -jar $givenjar"
else
jarcommand="$jarcommand -jar $jarpath"
fi
jarcommand="$jarcommand $@"
echo $jarcommand
}
#defaults
declare sparqlgenerate1="/usr/local/lib/sparql-generate-1.1.jar"
declare sparqlgenerate2="/usr/local/lib/sparql-generate-2.0-SNAPSHOT.jar"
declare jarpath=$sparqlgenerate1
declare givenjar=""
declare javaparameters="-Xmx15000M"
declare source="urn:source"
declare fquery="query.rqg"
declare finput
declare fcongif
declare redirection="/dev/null"
declare inputextension
declare outputextension="ttl"
declare -i split
if [ $# -eq 0 ]; then
showhelp
exit 0
fi
getopt --test > /dev/null
if [[ $? -eq 4 ]]; then
# enhanced getopt works
OPTIONS=c:e:hi:j:o:p:q:s:vV:
LONGOPTIONS=config:,inputextension:,help,input:,jar:,outputextension:,parameters:,query:,split:,verbose,version:
COMMAND=$(getopt -o $OPTIONS -l $LONGOPTIONS -n "$0" -- "$@")
if [[ $? -ne 0 ]]; then
exit 2
fi
eval set -- "$COMMAND"
else
echo "Enhanced getopt not supported. Brace yourself, this is not tested, but it should work as long as each argument is separated."
fi
while true; do
case "$1" in
# -c|--config)
# fcongif=$2
# shift 2
# ;;
-e|--inputextension)
inputextension=$2
shift 2
;;
-i|--input)
if [[ $2 == \~/* ]]; then
finput=$HOME/${2#"~/"}
else
finput=$2
fi
shift 2
;;
-j|--jar)
if [[ $2 == \~/* ]]; then
givenjar=$HOME/${2#"~/"}
else
givenjar=$2
fi
shift 2
;;
-o|--outputextension)
outputextension=$2
shift 2
;;
-p|--parameters)
javaparameters=$2
shift 2
;;
-q|--query)
if [[ $2 == \~/* ]]; then
fquery=$HOME/${2#"~/"}
else
fquery=$2
fi
shift 2
;;
-s|--split)
split=$2
shift 2
;;
-v|--verbose)
redirection="/dev/stderr"
shift
;;
-V|--version)
case "$2" in
"1") jarpath=$sparqlgenerate1 ;;
"2") jarpath=$sparqlgenerate2 ;;
*) echo "non-valid option for version. Defaulting to $jarpath." ;;
esac
shift 2
;;
--)
shift
break
;;
*)
echo "here"
showhelp
exit 0
;;
esac
done
for fq in $fquery; do
if [ ! -z "$inputextension" ]; then
fin=$(realpath ${fq%.rqg}.$inputextension)
fout=$(realpath ${fq%.rqg}.$outputextension)
if [ ! -z $split ]; then
echo '' > $fout.tmp
tail -n +2 "$fin" | split -l $split - "$fin"_split_
for fsplit in "$finput"_split_*; do
head -n 1 "$fin" > "$fin.tmp"
cat $fsplit >> "$fin.tmp"
jarcommand=$(buildcommand -q $fq -o $fout.tmp -oa --source \"$source\"=\"file://$fin.tmp\")
echo $jarcommand
eval $jarcommand 2> $redirection
rm $fsplit
done
sed -i -e '/^@prefix/{w $fout.prefixes' -e 'd}' $fout.tmp
sort -u $fout.prefixes > $fout
cat $fout.tmp >> $fout
rm $fin.tmp $fout.tmp $fout.prefixes
else
jarcommand=$(buildcommand -q $fq -o $fout --source \"$source\"=\"file://$fin\")
echo $jarcommand
eval $jarcommand 2> $redirection
fi
elif [ ! -z "$finput" ]; then
for fi in $finput; do
fin=$(realpath $fi)
fout=${fi%.*}.$outputextension
if [ ! -z $split ]; then
echo '' > $fout.tmp
echo '' > $fout
if [ ${fin: -5} == ".json" ]; then
tail -n +2 "$fin" | head -n -1 | split -l $split - "$fin"_split_
else
tail -n +2 "$fin" | split -l $split - "$fin"_split_
fi
for fsplit in "$finput"_split_*; do
head -n 1 "$fin" > "$fin.tmp"
cat $fsplit | sed '$s/,$//' >> "$fin.tmp"
if [ ${fin: -5} == ".json" ]; then
tail -n -1 "$fin" >> "$fin.tmp"
fi
jarcommand=$(buildcommand -q $fq -o $fout.tmp -oa --source \"$source\"=\"file://$fin.tmp\")
echo $jarcommand
eval $jarcommand 2> $redirection
rm $fsplit
cat $fout.tmp >> $fout
done
# Remove all prefixes from $fout
sed -i -e "/^@prefix/{w $fout.prefixes" -e 'd}' $fout
sort -u $fout.prefixes > $fout.tmp
cat $fout >> $fout.tmp
mv $fout.tmp $fout
rm $fin.tmp $fout.prefixes
else
jarcommand=$(buildcommand -q $fq -o $fout --source \"$source\"=\"file://$fin\")
echo $jarcommand
eval $jarcommand 2> $redirection
fi
done
else
foutput=$(realpath ${fq%.rqg}.$outputextension)
jarcommand=$(buildcommand -q $fq -o $foutput)
echo $jarcommand
eval $jarcommand 2> $redirection
fi
done