diff --git a/README.md b/README.md
index 0dca42ad..ebf0815c 100644
--- a/README.md
+++ b/README.md
@@ -234,6 +234,7 @@ These checks are available in the package. You can add or remove checks in the c
✅ The page contains no broken links.
✅ The page contains no broken images.
✅ Length of the content is at least 2100 characters.
+✅ No more than 20% of the content contains too long sentences (more than 20 words).
### Meta
diff --git a/resources/lang/en.json b/resources/lang/en.json
index b4255720..cf7c144b 100644
--- a/resources/lang/en.json
+++ b/resources/lang/en.json
@@ -29,5 +29,6 @@
"failed.performance.javascript_size": "The page contains Javascript files that are too large (max :expectedValue). These files were found: :actualValue.",
"failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.",
"failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.",
- "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page."
+ "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.",
+ "failed.content.too_long_sentence": "The page contains :actualValue sentences that are too long."
}
\ No newline at end of file
diff --git a/resources/lang/nl.json b/resources/lang/nl.json
index 8cc4583c..8b1fcd39 100644
--- a/resources/lang/nl.json
+++ b/resources/lang/nl.json
@@ -27,5 +27,6 @@
"failed.performance.javascript_size": "The page contains Javascript files that are too large (max :expectedValue). These files were found: :actualValue.",
"failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.",
"failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.",
- "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page."
+ "failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.",
+ "failed.content.too_long_sentence": "The page contains :actualValue sentences that are too long."
}
\ No newline at end of file
diff --git a/src/Checks/Content/TooLongSentenceCheck.php b/src/Checks/Content/TooLongSentenceCheck.php
new file mode 100644
index 00000000..82c1e3c0
--- /dev/null
+++ b/src/Checks/Content/TooLongSentenceCheck.php
@@ -0,0 +1,109 @@
+validateContent($crawler)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ public function validateContent(Crawler $crawler): bool
+ {
+ $realSentences = [];
+ $sentences = $this->getSentencesFromCrawler($crawler);
+
+ $sentences = $this->separateSentencesByDot($sentences);
+
+ $sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($sentences);
+
+ $this->actualValue = $this->calculateSentencesWithTooManyWords($sentences);
+
+ if (count($sentencesWithTooManyWords) === 0) {
+ return true;
+ }
+
+ // If more than 20% of the total sentences are too long, fail
+ if (count($sentencesWithTooManyWords) / count($sentences) > 0.2) {
+ $this->failureReason = __('failed.content.too_long_sentence', [
+ 'actualValue' => count($this->actualValue),
+ ]);
+
+ return false;
+ }
+
+ return true;
+ }
+
+ private function separateSentencesByDot(array $sentences): array
+ {
+ $newSentences = [];
+
+ foreach ($sentences as $sentence) {
+ $sentence = explode('.', $sentence);
+ $newSentences = array_merge($newSentences, $sentence);
+ }
+
+ // Remove all sentences that are empty
+ $sentences = array_filter($newSentences, function ($sentence) {
+ return ! empty($sentence);
+ });
+
+ return $sentences;
+ }
+
+ private function getSentencesFromCrawler(Crawler $crawler): array
+ {
+ $content = $crawler->filterXPath('//body')->children();
+
+ // Get all elements that contain text
+ $content = $content->filterXPath('//*/text()[normalize-space()]');
+
+ $content = $content->each(function (Crawler $node, $i) {
+ return $node->text();
+ });
+
+ return $content;
+ }
+
+ private function calculateSentencesWithTooManyWords(array $sentences): array
+ {
+ $tooLongSentences = [];
+
+ foreach ($sentences as $sentence) {
+ if (str_word_count($sentence) > 20) {
+ $tooLongSentences[] = $sentence;
+ }
+ }
+
+ return $tooLongSentences;
+ }
+}
diff --git a/tests/Checks/Content/TooLongSentenceCheckTest.php b/tests/Checks/Content/TooLongSentenceCheckTest.php
new file mode 100644
index 00000000..8290a8ef
--- /dev/null
+++ b/tests/Checks/Content/TooLongSentenceCheckTest.php
@@ -0,0 +1,74 @@
+ Http::response(
+ '
+
'.$body.'
+ ', + 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler)); +}); + +it('can perform the too long sentence check on page with no too long sentence', function () { + $check = new TooLongSentenceCheck(); + $crawler = new Crawler(); + + $body = 'One two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen'; + + Http::fake([ + 'vormkracht10.nl' => Http::response( + ' + +'.$body.'
+ ', + 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $check->check(Http::get('vormkracht10.nl'), $crawler); + + $this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler)); +}); + +it('can perform the too long sentence check on page with no body', function () { + $check = new TooLongSentenceCheck(); + $crawler = new Crawler(); + + Http::fake([ + 'vormkracht10.nl' => Http::response( + ' + +