Skip to content

Commit

Permalink
Merge pull request #27 from openzim/jl/feat/fts
Browse files Browse the repository at this point in the history
Added FTS support.
  • Loading branch information
benoit74 authored Sep 24, 2024
2 parents abf16af + 2df177e commit be6bccb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 9 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ LABEL org.opencontainers.image.source https://github.com/openzim/devdocs
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libmagic1 \
libcairo2 \
&& rm -rf /var/lib/apt/lists/* \
&& python -m pip install --no-cache-dir -U \
pip
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ readme = "README.md"
dependencies = [
"requests==2.32.3",
"pydantic==2.8.2",
"zimscraperlib==3.4.0",
"zimscraperlib==4.0.0",
"Jinja2==3.1.3",
"beautifulsoup4==4.12.3",
]
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

Expand Down
26 changes: 18 additions & 8 deletions src/devdocs2zim/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import defaultdict
from pathlib import Path

from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader, select_autoescape
from pydantic import BaseModel
from zimscraperlib.constants import ( # pyright: ignore[reportMissingTypeStubs]
Expand All @@ -16,7 +17,11 @@
Creator,
StaticItem,
)
from zimscraperlib.zim.indexing import ( # pyright: ignore[reportMissingTypeStubs]
IndexData,
)

# pyright: ignore[reportMissingTypeStubs]
from devdocs2zim.client import (
DevdocsClient,
DevdocsIndex,
Expand Down Expand Up @@ -339,6 +344,7 @@ def load_common_files(self) -> list[StaticItem]:
content=app_css,
is_front=False,
mimetype="text/css",
auto_index=False,
)
)

Expand All @@ -353,6 +359,7 @@ def load_common_files(self) -> list[StaticItem]:
),
is_front=True,
mimetype="text/plain",
auto_index=False,
)
)

Expand Down Expand Up @@ -419,10 +426,6 @@ def generate_zim(
Illustration_48x48_at_1=self.logo_path.read_bytes(),
)

# Disable indexing because it won't be available in the JS frontend
# and causes significant performance issues with rendered sidebars.
creator.config_indexing(False)

# Start creator early to detect problems early.
with creator as started_creator:
logger.info(" Fetching the index...")
Expand Down Expand Up @@ -496,14 +499,16 @@ def add_zim_contents(
num_slashes = path.count("/")
rel_prefix = "../" * num_slashes

content = MISSING_PAGE
if path in db:
content = db.get(path)
else:
content = db.get(path, MISSING_PAGE)
if path not in db:
logger.warning(
f" DevDocs is missing content for {title!r} at {path!r}."
)

plain_content = " ".join(
BeautifulSoup(content, features="lxml").find_all(string=True)
)

# NOTE: Profiling indicates Jinja templating takes about twice
# the CPU time as adding items without compression. This appears to
# be because of the navigation bar.
Expand All @@ -527,6 +532,11 @@ def add_zim_contents(
# navigation bar.
should_compress=True,
mimetype="text/html",
# Only index page content rather than navigation data.
index_data=IndexData(
title=title,
content=plain_content,
),
)

# Tracking metadta
Expand Down

0 comments on commit be6bccb

Please sign in to comment.