Skip to content

Commit

Permalink
Update the markdown to html converter
Browse files Browse the repository at this point in the history
  • Loading branch information
GordonZhang2024 committed May 31, 2024
1 parent d1a982b commit 2e73225
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 216 deletions.
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
pillow
ttkbootstrap
markdown2==2.4.13
217 changes: 3 additions & 214 deletions src/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""

import re
import markdown2

"""
tkMarker
Expand All @@ -33,218 +34,6 @@
SOFTWARE.
"""

def convert(text: str, file_path: str, preview=False):
return markdown2.markdown(text)

def convert(markdown: str, preview=False, file_path='./') -> str:
"""\
Function convert()
==================
Convert Markdown to HTML.
"""
global path
path = file_path
markdown = markdown.splitlines()
html = convert_str(markdown, preview=preview)
return html


def convert_str(markdown: list, preview=False, file_path='./') -> str:
"""
Function convert_str()
======================
Convert Markdown(split by lines) to HTML.
"""

html = ''

for line in markdown:
line = convert_single_line(line)
html = html + line

script = ''
if preview:
# reload automatically
script = '''\
<script>
function AutoRefresh(time) {
setTimeout("location.reload(true);", time);
}
</script>
'''

html = (
'''\
<html>
<title>
Preview
</title>
<!This is the preview.>
<style>
html {
font-family: 'Sans Mono';
}
</style>
'''
+ script
+ '''\
<body onload = "JavaScript:AutoRefresh(5000);">
'''
+ html
+ '''\
</body>
</html>
'''
)

# Convert quoted code
html = convert_code(html)

return html


def convert_gfm(line: str) -> str:
# GitHub flavored Markdown support
gfm_alerts = {
'[!NOTE]': 'NOTE',
'[!TIP]': 'TIP',
'[!IMPORTANT]': 'IMPORTANT',
'[!WARNING]': 'WARNING',
'[!CAUTION]': 'CAUTION',
}

for origin, html in gfm_alerts.items():
line = line.replace(origin, html)

line = line.replace('[ ]', '<input type="checkbox">').replace(
'[x]', '<input type="checkbox" checked>'
)

return line


def replace_script_tag(line: str) -> str:
line = line.replace('<script>', '').replace('</script>', '')

return line


def convert_list(line: str) -> str:
li = re.match(r'-\s[\w\W]+', line)
if li:
line = str(li.group(0))
line = line.replace('-', '<ul><li>') + '</li></ul>'

return line


def convert_code(text: str) -> str:
# Convert code
code = re.findall(r'`[\w\s</>]+?`', text)
if code:
for i in code:
c = '<q><code>' + i[1:-1] + '</code></q>'
text = text.replace(i, c)

return text


def convert_single_line(line: str) -> str:
"""
Function convert_single_line()
==============================
Convert single-line Markdown to HTML.
"""

global need_br_tag
need_br_tag = True # if the line need a '<br>' tag at the end.
have_style = True # if there is a style.

while have_style: # loop(because there will possibly be nested styles)
have_style = False

# find a style and convert it to html
head = re.match(r'#+\s', line)
if head:
head = str(head.group(0))
lenth = len(head) - 1
if lenth <= 6:
line = line.replace(head, f'<h{lenth}>')
line = line + f'</h{lenth}><hr/>'
need_br_tag = False
have_style = True

bold = re.findall(r'[\*_]{2}[\w\W]+?[\*_]{2}', line)
if bold:
for i in bold:
strong = '<strong>' + i[2:-2] + '</strong>'
line = line.replace(i, strong)
have_style = True

italic = re.findall(r'[\*_][\w\s</>]+?[\*_]', line)
if italic:
for i in italic:
em = '<em>' + i[1:-1] + '</em>'
line = line.replace(i, em)
have_style = True

quote = re.match(r'[>\s]+\s', line)
if quote:
quote = str(quote.group(0))
lenth = len(quote) - 1
line = line.replace(quote, lenth * '<blockquote>')
line = line + lenth * '</blockquote>'
have_style = True

strikethrough = re.findall(r'~{2}[\w\s]+?~{2}', line)
if strikethrough:
for i in strikethrough:
s = '<s>' + i[2:-2] + '</s>'
line = line.replace(i, s)
have_style = True

link = re.search(r'\[[\w\W\s]+?\]\([\w\s]+?\)', line)
if link:
link = str(link.group(0))
text = re.match(r'\[[\w\s]+?\]', link).group(0)
text = text.replace('(', '')
text = text.replace(')', '')
href = re.match(r'\([\w\s]+?\)', link).group(0)
href = href.replace('(', '')
href = href.replace(')', '')
html_link = f'<a href={href}>{text}</a>'
line = line.replace(link, html_link)
have_style = True

img = re.search(r'!\[[\w\W\s]+?\]\([\w\W]+?\)', line)
if img:
img = str(img.group(0))
description = re.search(r'!\[[\w\s]+?\]', img)
if description:
description = description.group(0)
else:
description = ''

src = img.replace(description, '').replace('(', '').replace(')', '')

description = description.replace('![', '').replace(']', '')
image = f'<img src={src} alt={description}/>'
line = line.replace(img, image)
need_br_tag = False
have_style = True

hr = line == '---'
if hr:
line = '<hr/>'
need_br_tag = False
have_style = True

line = convert_list(line)

# GitHub flavored Markdown support
line = convert_gfm(line)

line = replace_script_tag(line)

if need_br_tag:
line = line + '<br/>'

return line

0 comments on commit 2e73225

Please sign in to comment.