From 2a42293b2d5070be037c592a677d860613eb6e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gre=CC=81goire=20Compagnon?= Date: Fri, 14 Jun 2024 02:22:49 +0200 Subject: [PATCH] fix(export): fix excessive newline characters in Markdown content Remove excessive newline characters from Markdown content to ensure there are no unnecessary blank lines in the output. --- export_manager.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/export_manager.py b/export_manager.py index 9e8d3fe..74fe1de 100644 --- a/export_manager.py +++ b/export_manager.py @@ -1,6 +1,7 @@ import json from database_manager import DatabaseManager import logging # Add log messages +import re class ExportManager: @@ -32,10 +33,29 @@ def _adjust_headers(self, content, level_increment=1): if line.startswith("#"): hashes = len(line.split(" ")[0]) new_hashes = min(hashes + level_increment, 6) # Limit to ###### - line = "#" * new_hashes + line[hashes:] + line = "\n" + "#" * new_hashes + line[hashes:] + "\n" new_content += line + "\n" return new_content + def _cleanup_markdown(self, content): + """ + Remove excessive newline characters from Markdown content. + + This method replaces sequences of three or more consecutive newline characters + with exactly two newline characters, ensuring that there are no unnecessary + blank lines in the output. + + Args: + content (str): The Markdown content to be cleaned up. + + Returns: + str: The cleaned-up Markdown content with reduced newline characters. + """ + while "\n\n\n" in content: + content = content.replace("\n\n\n", "\n\n") + return content + + def _concatenate_markdown(self, pages): """ Concatenate a list of Markdown files into one, with header adjustments. @@ -46,7 +66,7 @@ def _concatenate_markdown(self, pages): Returns: str: The concatenated Markdown content. """ - final_content = f"# {self.title}\n\n" + final_content = f"# {self.title}\n" for url, content, metadata in pages: if content is None: continue # Skip empty pages @@ -56,7 +76,7 @@ def _concatenate_markdown(self, pages): } # Prepare metadata as an HTML comment - metadata_content = f"