-
Notifications
You must be signed in to change notification settings - Fork 3
/
wav2tiff.py
executable file
·142 lines (110 loc) · 4.07 KB
/
wav2tiff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/python
import numpy as np
from scipy.ndimage.interpolation import zoom
from scikits.audiolab import wavread
from tifffile import imsave
import argparse
opts = {
'wav': { 'value': None, 'help': 'wav file to process' },
'tiff': { 'value': 'wav.tiff', 'help': 'tiff file to save' },
'aspect_ratio': { 'value': 7. / 5., 'help': 'final image aspect ratio' },
}
def find_edges(data, sample_rate, trim=None):
''' Find the falling edges. If trim = True, discard edges whose interval fall outside 1%% of the trim value '''
edges = []
intervals = [0]
last_sample = 0
prev_interval = 0
window = sample_rate * 0.002
# smooth the sample data
smoothed = np.convolve(data, np.ones((window,)) / window)[(window-1):]
for i, sample in enumerate(smoothed):
if(sample < 0) and (last_sample > 0):
# print "found an edge at %d (%f -> %f)" % (i, last_sample, sample)
edges.append(i + window)
if trim:
intervals.append(i - prev_interval)
prev_interval = i
last_sample = sample
if not trim:
return edges
retval = []
trim = float(trim)
for i, sample in enumerate(intervals[1:]):
if(abs(sample - trim) / trim < 0.01):
retval.append(edges[i])
return retval
def is_sync(data):
''' Return True if it looks like sync data '''
return np.std(np.diff(data[1:])) < 1
if __name__ == '__main__':
# Build out a dynamic argument list based on opts
PARSER = argparse.ArgumentParser(description=__doc__)
for opt in sorted(opts):
if opts[opt]['value'] == True:
action = 'store_false'
elif opts[opt]['value'] == False:
action = 'store_true'
else:
action = 'store'
PARSER.add_argument('--%s' % opt, default=opts[opt]['value'], action=action, help='%s (default: %s)' % (opts[opt]['help'], str(opts[opt]['value'])))
ARGS = PARSER.parse_args()
for arg in vars(ARGS):
newarg = getattr(ARGS, arg)
opts[arg]['value'] = newarg
if not opts['wav']['value']:
raise Exception("--wav must be specified.")
# Load it up
wav_data, sample_rate, encoding = wavread(opts['wav']['value'])
try:
assert(len(wav_data[0]) == 2)
except TypeError, IndexError:
raise Exception("WAV doesn't appear to have two channels.")
samples = len(wav_data[:,0])
if samples < sample_rate * 2:
raise Exception("WAV too small to process.")
# grab two seconds of samples from the middle
mid = int((len(wav_data) / 2) - sample_rate)
print " length: %d seconds" % (samples / sample_rate)
print " encoding: %s" % encoding
print " samples: %d" % samples
print " sample rate: %d" % sample_rate
left = find_edges(wav_data[:,0][mid:(mid + (sample_rate * 2))], sample_rate=sample_rate)
right = find_edges(wav_data[:,1][mid:(mid + (sample_rate * 2))], sample_rate=sample_rate)
# ^ in boolean context is xor
if not (is_sync(left) ^ is_sync(right)):
raise Exception('Could not find sync channel.')
if is_sync(left):
print " H sync on: left channel"
sync_data = wav_data[:,0]
img_data = wav_data[:,1]
mean_interval = int(np.mean(np.diff(left)))
else:
print " H sync on: right channel"
sync_data = wav_data[:,1]
img_data = wav_data[:,0]
mean_interval = int(np.mean(np.diff(right)))
print "mean line size: %d" % mean_interval
print "\nProcessing sync data..."
# Find all of the sync edges
sync = find_edges(sync_data, trim=mean_interval, sample_rate=sample_rate)
height = len(sync)
print " image height: %d" % height
# scale to aspect ratio
width = (height * float(opts['aspect_ratio']['value'])) / mean_interval
print " image width: %d" % int(width * mean_interval)
print "\nProcessing video data..."
# Normalize the brightness data: signed 16-bit to unsigned
img_data = img_data + 1
img_data = img_data / 2
# Extract all of the image data. There must be a way to do this faster
# with numpy indexing, but I'm not seeing it.
img = []
for line in sync:
line_data = img_data[line:line+mean_interval].tolist()
if len(line_data) < mean_interval:
continue
img.append(line_data)
# import pdb; pdb.set_trace()
print " Saving %s" % opts['tiff']['value']
imsave(opts['tiff']['value'], zoom(input=np.asarray(img, dtype=np.float32), zoom=[1.0,width]))