Skip to content

Commit

Permalink
Improve quote handling for highlights with notes
Browse files Browse the repository at this point in the history
  • Loading branch information
Basti Tee committed Aug 21, 2023
1 parent 798fc82 commit 670de45
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## 0.2.0

- Improve quote handling of highlights with notes
- Extracted notes writer to separate module
- Improved type-safety
- Improve VSCode development environment
Expand Down
2 changes: 1 addition & 1 deletion tests/test_tolino_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_lang_de_noted_note_2(self) -> None: # noqa: D102
assert note.content.endswith('Alas."')
assert note.user_notes
assert note.user_notes.startswith('Let\'s make a long multi')
assert note.user_notes.endswith('quotes like "this')
assert note.user_notes.endswith('quotes like "this"')

def test_lang_de_noted_note_3(self) -> None: # noqa: D102
note = TolinoNote.from_unparsed_content(
Expand Down
29 changes: 17 additions & 12 deletions tolino_notes/tolino_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,23 @@ def __get_language(hint: str) -> Optional[Tuple[dict, str]]:
return None

@staticmethod
def __clean_string(string: str) -> str:
def __clean_string(string: str, strip_trail_lead_quotes: bool = True) -> str:
string = string.strip()
if strip_trail_lead_quotes:
for patt_repl in [
(r'"$', ''), # Trailing quotes
(r'^"', ''), # Leading quotes
]:
string = re.sub(patt_repl[0], patt_repl[1], string)
for patt_repl in [
(r'\s*"\s*$', ''), # Trailing quotes
(r'^\s*"\s*', ''), # Leading quotes
(r'[\u2018\u2019\u00b4`]', '\''), # Special ticks ’‘´`
(r'[“”«»]+', '"'), # Unwanted quote types
(r'\'{2}', '"'), # Double-quotes made of single-quotes ''
(r'\s', ' '), # Whitespace characters
(r'…', '...'), # Special dashes
]:
string = re.sub(patt_repl[0], patt_repl[1], string)
return string
return string.strip()

@staticmethod
def from_unparsed_content(unparsed_content: str) -> Optional['TolinoNote']:
Expand Down Expand Up @@ -141,14 +145,13 @@ def from_unparsed_content(unparsed_content: str) -> Optional['TolinoNote']:
)
elif re.match(lang_dict['highlight_prefix'] + r'.*', prefix):
# For highlights the entire content is what the user highlighted
content = TolinoNote.__clean_string(
' '.join(
[
re.sub(r'\s', ' ', li.strip()).strip()
for li in full_text_split[1:]
]
)
content = ' '.join(
[
re.sub(r'\s', ' ', li.strip()).strip()
for li in full_text_split[1:]
]
)
content = TolinoNote.__clean_string(content)
return TolinoNote(
NoteType.HIGHLIGHT,
lang_id[1],
Expand All @@ -165,7 +168,9 @@ def from_unparsed_content(unparsed_content: str) -> Optional['TolinoNote']:
# Best guess: Begin of the book highlight is the last quote
# preceeded by a line break. ¯\_(ツ)_/¯
user_notes = r'\n"'.join(fts.split('\n"')[:-1])
user_notes = TolinoNote.__clean_string(re.sub(r'\s', ' ', user_notes))
user_notes = TolinoNote.__clean_string(
re.sub(r'\s', ' ', user_notes), False
)
# Before that is what the user wrote
highlight = r'\n"'.join(fts.split('\n"')[-1:])
highlight = TolinoNote.__clean_string(re.sub(r'\s', ' ', highlight))
Expand Down

0 comments on commit 670de45

Please sign in to comment.