-
Notifications
You must be signed in to change notification settings - Fork 0
/
rfc_color.py
executable file
·380 lines (321 loc) · 13.5 KB
/
rfc_color.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#!/usr/bin/env python3
from __future__ import print_function
import requests
import sys, os, re, time
from signal import signal, SIGPIPE, SIG_DFL
VERSION = "0.2.1"
signal(SIGPIPE,SIG_DFL)
class Cl:
'''ANSI/VT100 colors'''
RESET = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
INVERSE = '\033[7m'
LRED = '\033[91m'
LGREEN = '\033[92m'
LYELLOW = '\033[93m'
LBLUE = '\033[94m'
LMAGENTA = '\033[95m'
LCYAN = '\033[96m'
BRED = '\033[41m'
BGREEN = '\033[42m'
BYELLOW = '\033[43m'
BBLUE = '\033[44m'
BMAGENTA = '\033[45m'
BCYAN = '\033[46m'
DRED = '\033[31m'
DGREEN = '\033[32m'
DYELLOW = '\033[33m'
DBLUE = '\033[34m'
DMAGENTA = '\033[35m'
DCYAN = '\033[36m'
# 256 colors
# See example: https://stackoverflow.com/questions/287871/print-in-terminal-with-colors/50025330#50025330
fg_col = lambda color: "\33[38;5;" + str(color) + "m"
bg_col = lambda color: "\33[48;5;" + str(color) + "m"
fg = lambda text, color: Cl.fg_col(color) + text + Cl.RESET
bg = lambda text, color: Cl.bg_col(color) + text + Cl.RESET
bold = lambda s: Cl.BOLD + s + Cl.RESET
red = lambda s: Cl.LRED + s + Cl.RESET
gray = lambda s: Cl.fg(s, 245)
def get_rfc_text(num):
'''Retrieve the plain text of RFC'''
rfc_path = os.path.join(os.path.expanduser('~'), '.local/share/rfc/')
rfc_fn = os.path.join(rfc_path, 'rfc{}.txt'.format(num))
# Check on this machine
if os.path.exists(rfc_path):
if os.path.exists(rfc_fn):
try:
txt = open(rfc_fn,'r').read()
print(Cl.gray('Found at: ' + rfc_fn))
if txt:
return txt
else:
print("EMPTY")
except Exception as e:
print("Couldn't read {}:".format(rfc_fn),e)
# Fetch from the Internet
t = time.time()
url = 'https://www.ietf.org/rfc/rfc{}.txt'.format(num)
r = requests.get(url)
t = time.time() - t
print (Cl.gray("URL: "+url))
print (Cl.gray("Grabbed in {:.1f} s".format(t)))
#print (Cl.gray("Status: " + str(r.status_code)))
#print (Cl.gray("Content-type: " + str(r.headers['content-type'])))
text = r.text
# Save locally
try:
if not os.path.exists(rfc_path):
os.makedirs(rfc_path)
open(rfc_fn,'w').write(text)
print("Saved locally")
except:
pass
return text
class RFCParser:
'''
A class for doing basic plain text RFC parsing and coloring
'''
# Precompiled regular expressions
re_page_num = re.compile(r'\[\s*Page\s+(\d+|[ivxlc]+)\s*\]', re.I)
re_toc = re.compile(r'^\s*Table\s+of\s+contents\s*$',re.I)
re_hat_rfc = re.compile(r'((?:RFC|Request for Comments):\s*)(\d+)', re.I)
re_hat_obs = re.compile(r'((?:Obsoletes|Replaces):\s*)(\d+(,\s*)?)+', re.I)
re_hat_upd = re.compile(r'((?:Updates):\s*)(\d+(,\s*)?)+', re.I)
re_hat_cat = re.compile(r'((?:Category):\s*)(.+?)(\s{3}|$|\x1b\[)', re.I)
re_chapter = re.compile(r'^(\s*)((?:Appendix )?(?:\d+\.|[A-Z]\.)*(?:\d+\.?|[A-Z]\.?)?)?(\s+)(\w.*?\w)\s*$')
re_toc_chapter = re.compile(r'^(\s*)((?:Appendix )?(?:\d+\.|[A-Z]\.)*(?:\d+\.?|[A-Z]\.?)?)?(\s+)(\w.*?\w)((?:\s*\.){4,}\s*)(\d+)\s*$')
re_rfc = re.compile(r'(RFC)(\s{0,1})(\d+)')
# RFC 2119
# TODO: make this tolerant to line breaks
re_must_not = re.compile(r'(?<!\w|;)(MUST NOT|SHALL NOT)(?!\033)')
re_must = re.compile(r'(?<!\w|;)(MUST|SHALL|REQUIRED)(?!\033)')
re_should_not = re.compile(r'(?<!\w|;)(SHOULD NOT|NOT RECOMMENDED)(?!\033)')
re_should = re.compile(r'(?<!\w|;)(SHOULD|RECOMMENDED)(?!\033)')
re_may = re.compile(r'(?<!\w|;)(MAY|OPTIONAL)(?!\033)')
# Coloring
HAT_COLOR = 42
OBSOLETE_COLOR = 202
UPDATED_COLOR = 11
THIS_COLOR = 14
CATEGORY_COLOR = 200
RFC_COLOR = 177
OTHER_RFC_COLOR = 45
# Background colors
# TODO: we are not tracking line breaks, so these keywords have only three colors
TITLE_BG = 88
MUST_NOT_BG = 88
MUST_BG = 88 # 89
SHOULD_NOT_BG = 94 # 58
SHOULD_BG = 94 #60
MAY_BG = 22
def __init__(self, text=None):
# Line numbers
self.hat = []
self.title = [] # title (line numbers)
self.toc_lines = []
self.obsoleted = [] # list of RFC numbers (as strings) that are obsoleted by this document
self.updated = [] # list of RFC numbers (as strings) that are updated by this document
self.rfc = None # number of RFC as parsed
self.bottom = None # line with page number
self.top_lines = None
self.toc_start = None # line number at which TOC starts
self.toc_end = None # line number at which TOC ends
self.width = None # by rightmost character position
self.indents = {}
self.chapters = [] # pairs (num,title,page)
if text:
self.analyze(text)
def rfc_num_color(self, match):
def color(num):
if num in self.obsoleted: return self.OBSOLETE_COLOR
if num in self.updated: return self.UPDATED_COLOR
return self.OTHER_RFC_COLOR
return Cl.fg(match.group(1), self.RFC_COLOR) + \
match.group(2) + \
Cl.fg(match.group(3), color(match.group(3)))
@staticmethod
def what_indent(line):
indent = 0
while indent<len(line) and line[indent].isspace():
indent += 1
return indent
def is_chapter(self, line, last_page):
sch = self.re_chapter.search(line)
next_page = lambda x: str(int(x)+1) if x and x.isdigit() else x
if sch:
num, title = [sch.group(x) for x in [2,4]]
if num: num = num.rstrip('.')
if [t for n,t,p in self.chapters if (not n or n==num or next_page(last_page)==p) and t==title]:
return True
return False
def analyze(self, text):
self.top_lines = {} # line->[count,{}], where {} are next lines in the same form
top_line_pointer = None
lines = text.split('\n')
# Pass 1: find hat, TOC, width, repeating lines
hat_ended = False
title_ended = False
toc_found = False
for nl, line in zip(range(len(lines)),lines):
line = line.rstrip()
if not line:
if self.hat: hat_ended = True
if self.title: title_ended = True
continue
if not hat_ended:
self.hat = (self.hat or []) + [nl]
elif not title_ended:
self.title = (self.title or []) + [nl]
# Analyze width
if self.width is None or len(line)>self.width:
self.width = len(line)
# Analyze repeating lines after page number
if self.re_page_num.search(line):
top_line_pointer = self.top_lines
elif top_line_pointer is not None:
# Increase counter
if line in top_line_pointer:
top_line_pointer[line][0] += 1
else:
top_line_pointer[line] = [1,{}]
# Proceed to next node
top_line_pointer = top_line_pointer[line][1]
# TOC
# TODO: allow TOC lines with no dots as in RFC 3261 (10.2.1.1)
# TODO: allow multiline headers as in RFC 2617
if not toc_found and self.re_toc.search(line):
toc_found = True
# Pass 2: analyze indentations, TOC chapters
self.indents = {}
toc = None
top_line_pointer = None
last_page = None
for nl, line in zip(range(len(lines)),lines):
line = line.rstrip()
if not line or nl in self.hat:
continue
# Repeating lines after page number
sch = self.re_page_num.search(line)
if sch:
last_page = sch.group(1)
top_line_pointer = self.top_lines
continue
if top_line_pointer and line in top_line_pointer and top_line_pointer[line][0]>1:
top_line_pointer = top_line_pointer[line][1]
continue
# Current indent
indent = self.what_indent(line)
if indent in self.indents:
self.indents[indent] += 1
else:
self.indents[indent] = 1
# TOC
if toc_found:
if toc is None and self.re_toc.search(line):
toc = True
elif toc == True:
sch = self.re_toc_chapter.search(line)
if sch:
self.toc_lines = (self.toc_lines or []) + [nl]
num,title,page = [sch.group(x) for x in [2,4,6]]
if num: num = num.rstrip('.')
self.chapters.append((num or None,title,page))
elif self.is_chapter(line, last_page):
#print(Cl.fg(str(self.toc_lines), 9))
toc = False
self.main_indent = min(self.indents.items(), key=lambda x: (-x[1],x[0]))[0]
self.min_indent = min(self.indents.keys())
def color(self, text):
if self.width is None: # analyze text if not already done so
self.analyze(text)
# Do coloring
lines = text.split('\n')
r = ''
top_line_pointer = None
last_page = None
t = time.time()
for nl, line in zip(range(len(lines)),lines):
line = line.rstrip()
indent = self.what_indent(line)
# SWITCH
# Empty line
if not line:
r += line + '\n'
last_empty = True
# Hat
elif nl in self.hat:
line = Cl.fg(line, self.HAT_COLOR) + '\n'
# Obsolete
match = self.re_hat_obs.search(line)
if match:
self.obsoleted = [x.strip() for x in match.group(2).split(',')]
line = self.re_hat_obs.sub("\\1" + Cl.RESET + Cl.fg("\\2", self.OBSOLETE_COLOR) + Cl.fg_col(self.HAT_COLOR), line)
# Updated
match = self.re_hat_upd.search(line)
if match:
self.updated = [x.strip() for x in match.group(2).split(',')]
line = self.re_hat_upd.sub("\\1" + Cl.RESET + Cl.fg("\\2", self.UPDATED_COLOR) + Cl.fg_col(self.HAT_COLOR), line)
# This RFC's number
if self.re_hat_rfc.search(line):
line = self.re_hat_rfc.sub("\\1" + Cl.RESET + Cl.fg("\\2", self.THIS_COLOR) + Cl.fg_col(self.HAT_COLOR), line)
# Category
# TODO: for RFC 2549 make it more elegant
if self.re_hat_cat.search(line):
line = self.re_hat_cat.sub("\\1" + Cl.RESET + Cl.fg("\\2", self.CATEGORY_COLOR) + Cl.fg_col(self.HAT_COLOR) + "\\3", line)
r += line
# Title
elif nl in self.title:
actual_text = line.lstrip()
spaces = line[:-len(actual_text)]
r += spaces + Cl.bg(actual_text, self.TITLE_BG) + '\n'
# Page number
elif self.re_page_num.search(line):
last_page = self.re_page_num.search(line).group(1)
r += Cl.fg(line, 36) + '\n'
top_line_pointer = self.top_lines
# Repeating lines after page number
elif top_line_pointer and line in top_line_pointer and top_line_pointer[line][0]>1:
r += Cl.fg(line, 36) + '\n'
top_line_pointer = top_line_pointer[line][1]
# TOC chapters
elif nl in self.toc_lines:
TOC_NUM_COL = 253
TOC_TITLE_COL = 255
TOC_DOTS_COL = 248
TOC_PAGE_COL = 254
r += self.re_toc_chapter.sub("\\1" + Cl.fg("\\2",TOC_NUM_COL) + \
"\\3\\4" + \
Cl.fg("\\5",TOC_DOTS_COL) + \
Cl.fg("\\6",TOC_PAGE_COL), line) + \
"\n"
#sch = self.re_toc_chapter.search(line)
#r += line + "\n"
# Chapter?
elif self.is_chapter(line, last_page):
#r += Cl.fg(line, 13) + "\n"
r += Cl.BOLD + line + Cl.RESET + "\n"
# Topic?
elif self.main_indent and indent==self.min_indent:
#r += Cl.BOLD + line + Cl.RESET + '\n'
r += Cl.fg(line, 14) + "\n"
# Default: as is
else:
# TODO: allow multiline RFC numbers
line = self.re_rfc.sub(self.rfc_num_color,line)
line = self.re_must_not.sub(Cl.bg("\\1", self.MUST_NOT_BG),line)
line = self.re_must.sub(Cl.bg("\\1", self.MUST_BG),line)
line = self.re_should_not.sub(Cl.bg("\\1", self.SHOULD_NOT_BG),line)
line = self.re_should.sub(Cl.bg("\\1", self.SHOULD_BG),line)
line = self.re_may.sub(Cl.bg("\\1", self.MAY_BG),line)
r += line + '\n'
last_line = line
return r
def color_rfc(text):
'''Return ANSI/VT100 colored text'''
parser = RFCParser()
return parser.color(text)
if __name__=="__main__":
text = get_rfc_text(sys.argv[1])
print(color_rfc(text), flush=True)