Skip to content

Commit

Permalink
fix(sandside): #191 reduce the size for description
Browse files Browse the repository at this point in the history
and check the sanitizer

close #191
  • Loading branch information
Marthym committed Dec 2, 2023
1 parent f6178ac commit bb8f8d2
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 5 deletions.
1 change: 1 addition & 0 deletions .run/BaywatchApplication.run.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<env name="BAYWATCH_INDEXER_ENABLE" value="true" />
<env name="BAYWATCH_SCRAPER_ENABLE" value="true" />
<env name="SPRING_THREADS_VIRTUAL_ENABLED" value="true" />
<env name="CONSOLE_LOG_PATTERN" value="%clr(%d{${LOG_DATEFORMAT_PATTERN:-yyyy-MM-dd'T'HH:mm:ss.SSSXXX}}){faint} %clr(${LOG_LEVEL_PATTERN:-%5p}) %clr(---){faint} %clr(%-40.40logger{39}){cyan} %clr(:){faint} %m%n${LOG_EXCEPTION_CONVERSION_WORD:-%wEx}" />
</envs>
<module name="assembly" />
<option name="SPRING_BOOT_MAIN_CLASS" value="fr.ght1pc9kc.baywatch.BaywatchApplication" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
@Slf4j
public class SanitizerFilter implements NewsFilter, FeedsFilter {
private static final int PLAIN_TEXT_MAX_LENGTH = 250;
private static final int HTML_MAX_LENGTH = 3_000;
private static final int HTML_MAX_LENGTH = 1_000;

private static final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> TITLE_POLICY =
new HtmlPolicyBuilder().toFactory();
Expand Down Expand Up @@ -50,7 +50,8 @@ private static String sanitizeHtml(final String html) {

String htmlEllipsed = html.substring(0, Math.min(HTML_MAX_LENGTH, html.length()));
StringBuilder htmlBuilder = new StringBuilder();
HtmlStreamRenderer htmlRenderer = HtmlStreamRenderer.create(htmlBuilder, invalid -> log.trace("Invalid tag detected in description {}", invalid));
HtmlStreamRenderer htmlRenderer = HtmlStreamRenderer.create(htmlBuilder, invalid ->
log.atTrace().addArgument(invalid).setMessage("Invalid tag detected in description {}").log());
HtmlSanitizer.sanitize(HtmlUtils.htmlUnescape(htmlEllipsed), DESCRIPTION_POLICY.apply(htmlRenderer));
String saneHtml = htmlBuilder.toString();

Expand All @@ -64,7 +65,8 @@ private static String sanitizePlainText(final String text) {

String txtEllipsed = text.substring(0, Math.min(PLAIN_TEXT_MAX_LENGTH, text.length()));
StringBuilder txtBuilder = new StringBuilder();
HtmlStreamRenderer txtRenderer = HtmlStreamRenderer.create(txtBuilder, invalid -> log.trace("Invalid tag detected in title {}", invalid));
HtmlStreamRenderer txtRenderer = HtmlStreamRenderer.create(txtBuilder, invalid ->
log.atTrace().setMessage("Invalid tag detected in title {}").addArgument(invalid).log());
HtmlSanitizer.sanitize(HtmlUtils.htmlUnescape(txtEllipsed), TITLE_POLICY.apply(txtRenderer));
String saneText = txtBuilder.toString();

Expand Down
1 change: 0 additions & 1 deletion sandside/src/main/resources/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ management:
application: ${spring.application.name}

logging:
# pattern.level: "%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}]"
# Avoid infinite stacktrace due to reactor
exception-conversion-word: "%wEx{full, reactor.core.publisher}"
level:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void should_sanitize_news_description() {
"fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in " +
"culpa qui officia deserunt mollit anim id est laborum. " +
"Illegal H1 usageIllegal A usage<b>Lorem ipsum dolor")
.endsWith("quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irur</b>"))
.endsWith("sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, co</b>"))
.verifyComplete();
}
}

0 comments on commit bb8f8d2

Please sign in to comment.