-
Notifications
You must be signed in to change notification settings - Fork 46
/
check_files.py
77 lines (63 loc) · 3.17 KB
/
check_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import re
import pandas as pd
# Break above ul/ol
def check_md_files_for_list_spacing(directory):
list_pattern = re.compile(r'^(\s*)(1\.\s+|\-\s+|\*\s+|\+\s+)', re.MULTILINE)
yaml_pattern = re.compile(r'---(.*?)---(.*)', re.DOTALL)
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as md_file:
md_file_content = md_file.read()
# Probeer de YAML-header te verwijderen
match = yaml_pattern.match(md_file_content)
if match:
content = match.group(2) # Alleen de content na de tweede ---
line_errors = []
for match in list_pattern.finditer(content):
start = match.start()
line_number = content.count("\n", 0, start) + 1
if start == 0 or content[start-2:start] != '\n\n':
line_errors.append(line_number)
if line_errors:
print(f'List without preceding break found in {file_path} at lines {line_errors}')
# Remove Comments
def check_codeblocks(directory):
codeblock_pattern = re.compile(r'^(```\w*[\s\S]+?```)', re.MULTILINE)
comment_pattern = re.compile(r'<!--.*?-->', re.DOTALL)
errors = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as md_file:
content = md_file.read()
# Verwijder HTML comments
content = re.sub(comment_pattern, '', content)
for match in codeblock_pattern.finditer(content):
start = match.start()
line_number = content.count("\n", 0, start) + 1
codeblock_content = match.group(0)
has_language = re.match(r'^```(\w+)', codeblock_content)
is_tagged = re.search(r'{{%\s*codeblock\s*%}}[\s\S]*{{%\s*/codeblock\s*%}}', content, re.DOTALL)
error_type = None
if not is_tagged and not has_language:
error_type = 'beide'
elif not is_tagged:
error_type = 'structuur'
elif not has_language:
error_type = 'taal'
if error_type:
errors.append({'Bestandspad': file_path, 'Fouttype': error_type})
# Maak een DataFrame van de foutenlijst
if errors:
df_errors = pd.DataFrame(errors)
print(df_errors.to_string(index=False))
else:
print("Geen fouten gevonden.")
# check_codeblocks('content')
# Check Spacing: check_md_files_for_list_spacing('content')
# Check Codeblocks:
check_codeblocks('content')