-
Notifications
You must be signed in to change notification settings - Fork 0
/
table2wide.py
executable file
·51 lines (45 loc) · 1.59 KB
/
table2wide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python3 -W all
"""
table2wide.py: convert table to wide format
usage: table2wide.py < file
note: table-specific because of in-code definition of column roles
20180618 erikt(at)xs4all.nl
"""
import csv
import sys
ID = "id"
EMPTYDICT = {}
KEEP = [ID,"treatment","counselor","GeslachtA","Leeftijd_t0","agegroup2_t0","Cursusafgerond"]
SEPARATOR = ","
TIME = "timeframe"
TIMES = ["T0","T1"]
def makeNewFieldNames(fieldNamesIn):
fieldNamesOut = list(KEEP)
for field in fieldNamesIn:
if not field in KEEP and field != TIME:
for time in TIMES:
fieldNamesOut.append(field+time)
return(fieldNamesOut)
def fillWithNA(fieldNames):
return({ f:"NA" for f in fieldNames })
def main(argv):
csvreader = csv.DictReader(sys.stdin,delimiter=SEPARATOR)
fieldNames = makeNewFieldNames(csvreader.fieldnames)
csvwriter = csv.DictWriter(sys.stdout,delimiter=SEPARATOR,fieldnames=fieldNames)
csvwriter.writeheader()
lastRow = EMPTYDICT
for row in csvreader:
if lastRow == EMPTYDICT:
lastRow = row
row = csvreader.__next__()
outRow = fillWithNA(fieldNames)
for field in KEEP: outRow[field] = lastRow[field]
for field in row.keys():
if not field in KEEP and field != TIME:
outRow[field+lastRow[TIME]] = lastRow[field]
if row[ID] == lastRow[ID]: outRow[field+row[TIME]] = row[field]
csvwriter.writerow(outRow)
if row[ID] == lastRow[ID]: lastRow = EMPTYDICT
else: lastRow = row
if __name__ == "__main__":
sys.exit(main(sys.argv))