-
Notifications
You must be signed in to change notification settings - Fork 6
/
functions.py
152 lines (134 loc) · 4.9 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from collections import Counter
import numpy as np
def zigzag(matrix: np.ndarray) -> np.ndarray:
"""
computes the zigzag of a quantized block
:param numpy.ndarray matrix: quantized matrix
:returns: zigzag vectors in an array
"""
# initializing the variables
h = 0
v = 0
v_min = 0
h_min = 0
v_max = matrix.shape[0]
h_max = matrix.shape[1]
i = 0
output = np.zeros((v_max * h_max))
while (v < v_max) and (h < h_max):
if ((h + v) % 2) == 0: # going up
if v == v_min:
output[i] = matrix[v, h] # first line
if h == h_max:
v = v + 1
else:
h = h + 1
i = i + 1
elif (h == h_max - 1) and (v < v_max): # last column
output[i] = matrix[v, h]
v = v + 1
i = i + 1
elif (v > v_min) and (h < h_max - 1): # all other cases
output[i] = matrix[v, h]
v = v - 1
h = h + 1
i = i + 1
else: # going down
if (v == v_max - 1) and (h <= h_max - 1): # last line
output[i] = matrix[v, h]
h = h + 1
i = i + 1
elif h == h_min: # first column
output[i] = matrix[v, h]
if v == v_max - 1:
h = h + 1
else:
v = v + 1
i = i + 1
elif (v < v_max - 1) and (h > h_min): # all other cases
output[i] = matrix[v, h]
v = v + 1
h = h - 1
i = i + 1
if (v == v_max - 1) and (h == h_max - 1): # bottom right element
output[i] = matrix[v, h]
break
return output
def trim(array: np.ndarray) -> np.ndarray:
"""
in case the trim_zeros function returns an empty array, add a zero to the array to use as the DC component
:param numpy.ndarray array: array to be trimmed
:return numpy.ndarray:
"""
trimmed = np.trim_zeros(array, 'b')
if len(trimmed) == 0:
trimmed = np.zeros(1)
return trimmed
def run_length_encoding(array: np.ndarray) -> list:
"""
finds the intermediary stream representing the zigzags
format for DC components is <size><amplitude>
format for AC components is <run_length, size> <Amplitude of non-zero>
:param numpy.ndarray array: zigzag vectors in array
:returns: run length encoded values as an array of tuples
"""
encoded = list()
run_length = 0
eob = ("EOB",)
for i in range(len(array)):
for j in range(len(array[i])):
trimmed = trim(array[i])
if j == len(trimmed):
encoded.append(eob) # EOB
break
if i == 0 and j == 0: # for the first DC component
encoded.append((int(trimmed[j]).bit_length(), trimmed[j]))
elif j == 0: # to compute the difference between DC components
diff = int(array[i][j] - array[i - 1][j])
if diff != 0:
encoded.append((diff.bit_length(), diff))
else:
encoded.append((1, diff))
run_length = 0
elif trimmed[j] == 0: # increment run_length by one in case of a zero
run_length += 1
else: # intermediary steam representation of the AC components
encoded.append((run_length, int(trimmed[j]).bit_length(), trimmed[j]))
run_length = 0
# send EOB
if not (encoded[len(encoded) - 1] == eob):
encoded.append(eob)
return encoded
def get_freq_dict(array: list) -> dict:
"""
returns a dict where the keys are the values of the array, and the values are their frequencies
:param numpy.ndarray array: intermediary stream as array
:return: frequency table
"""
#
data = Counter(array)
result = {k: d / len(array) for k, d in data.items()}
return result
def find_huffman(p: dict) -> dict:
"""
returns a Huffman code for an ensemble with distribution p
:param dict p: frequency table
:returns: huffman code for each symbol
"""
# Base case of only two symbols, assign 0 or 1 arbitrarily; frequency does not matter
if len(p) == 2:
return dict(zip(p.keys(), ['0', '1']))
# Create a new distribution by merging lowest probable pair
p_prime = p.copy()
a1, a2 = lowest_prob_pair(p)
p1, p2 = p_prime.pop(a1), p_prime.pop(a2)
p_prime[a1 + a2] = p1 + p2
# Recurse and construct code on new distribution
c = find_huffman(p_prime)
ca1a2 = c.pop(a1 + a2)
c[a1], c[a2] = ca1a2 + '0', ca1a2 + '1'
return c
def lowest_prob_pair(p):
# Return pair of symbols from distribution p with lowest probabilities
sorted_p = sorted(p.items(), key=lambda x: x[1])
return sorted_p[0][0], sorted_p[1][0]