-
Notifications
You must be signed in to change notification settings - Fork 4
/
interactive.sh
52 lines (48 loc) · 1.2 KB
/
interactive.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env bash
if [ "$1" == '-h' ]
then
echo "bash interactive.sh bpefile en2de 14 dictpath --path cktpath --lenpen 1. --beam 5"
exit
fi
set -x
set -e
export PYTHONIOENCODING="UTF-8"
export TORCH_HOME=/code/bertnmt/checkpoints/bert
cd /code/bertnmt
pip install --editable . --user --quiet
MOSE=/code/mosesdecoder
sockeye=/code/sockeye
bpefile=$1
shift
lng=$1
src=${lng:0:2}
tgt=${lng:3:2}
shift
year=$1
shift
POSITIONAL=()
beam=5
lenpen=1.0
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
--beam)
$beam=$2; shift 2;;
--lenpen)
$lenpen=$2; shift 2;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}"
sed -r 's/(@@ )|(@@ ?$)//g' $bpefile > $bpefile.debpe
$MOSE/scripts/tokenizer/detokenizer.perl -l $src < $bpefile.debpe > $bpefile.debpe.detok
paste -d "\n" $bpefile $bpefile.debpe.detok > $bpefile.in
cat $bpefile.in | python interactive.py "${POSITIONAL[@]}" -s $src -t $tgt \
--buffer-size 1024 --batch-size 128 --beam 5 --remove-bpe > output.log
grep ^H output.log | cut -f3- > output.tok
perl $MOSE/scripts/tokenizer/detokenizer.perl -l $tgt < output.tok > output.tok.detok
cat output.tok.detok | $sockeye/sockeye_contrib/sacrebleu/sacrebleu.py -t wmt$year -l $src-$tgt