-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-avro-schema.py
39 lines (31 loc) · 1.17 KB
/
generate-avro-schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/env python
import subprocess
import simplejson as json
import sys
# TODO: parameterize
# existing table for which the AVRO should be generated
if len( sys.argv ) != 2:
print "Usage: %s [input-db.]<input-table-name>" % sys.argv[0]
sys.exit(1)
TBL_NAME=sys.argv[1]
process = subprocess.Popen([ 'hive -e "DESC %s" 2>/dev/null' % TBL_NAME ],
shell=True,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
output = process.communicate()[0].strip()
def gen_avro_field(hive_schema_line):
# print hive_schema_line
name, type, _ = hive_schema_line.strip().split('\t')
return '{ "name": "%s", "type": "%s" } ' % ( name, type)
lst = [ gen_avro_field(line) for line in output.split('\n') if len( line.strip() ) != 0 and line[0] != '#' ]
schema_literal = """{
"namespace": "fileformat.benchmark",
"name": "BenchmarkRecord",
"type": "record",
"fields": [
%s
]
}""" % ( ',\n\t\t'.join( lst ) )
print schema_literal