Skip to content

Commit

Permalink
Merge branch 'too-long-sentences-check' of github.com:vormkracht10/la…
Browse files Browse the repository at this point in the history
…ravel-seo-scanner into flesch-reading-ease-score
  • Loading branch information
Baspa committed Aug 11, 2023
2 parents 979499c + f0bf592 commit 2d95789
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ These checks are available in the package. You can add or remove checks in the c
✅ The page contains no broken links. <br>
✅ The page contains no broken images. <br>
✅ Length of the content is at least 2100 characters. <br>
✅ No more than 20% of the content contains too long sentences (more than 20 words). <br>

### Meta

Expand Down
3 changes: 2 additions & 1 deletion resources/lang/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
"failed.performance.javascript_size": "The page contains Javascript files that are too large (max :expectedValue). These files were found: :actualValue.",
"failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.",
"failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.",
"failed.performance.ttfb.missing_url": "We could not get the TTFB for this page."
"failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.",
"failed.content.too_long_sentence": "The page contains :actualValue sentences that are too long."
}
3 changes: 2 additions & 1 deletion resources/lang/nl.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@
"failed.performance.javascript_size": "The page contains Javascript files that are too large (max :expectedValue). These files were found: :actualValue.",
"failed.performance.response": "The page returned a response code other than :expectedValue. The actual response code was :actualValue.",
"failed.performance.ttfb": "The page took too long to load (max :expectedValuems). The actual time was :actualValuems.",
"failed.performance.ttfb.missing_url": "We could not get the TTFB for this page."
"failed.performance.ttfb.missing_url": "We could not get the TTFB for this page.",
"failed.content.too_long_sentence": "The page contains :actualValue sentences that are too long."
}
109 changes: 109 additions & 0 deletions src/Checks/Content/TooLongSentenceCheck.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
<?php

namespace Vormkracht10\Seo\Checks\Content;

use Illuminate\Http\Client\Response;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Interfaces\Check;
use Vormkracht10\Seo\Traits\PerformCheck;

class TooLongSentenceCheck implements Check
{
use PerformCheck;

public string $title = 'Too long sentence check';

public string $priority = 'medium';

public int $timeToFix = 45;

public int $scoreWeight = 5;

public bool $continueAfterFailure = true;

public ?string $failureReason;

public mixed $actualValue = null;

public mixed $expectedValue = null;

public function check(Response $response, Crawler $crawler): bool
{
if (! $this->validateContent($crawler)) {
return false;
}

return true;
}

public function validateContent(Crawler $crawler): bool
{
$realSentences = [];
$sentences = $this->getSentencesFromCrawler($crawler);

$sentences = $this->separateSentencesByDot($sentences);

$sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($sentences);

$this->actualValue = $this->calculateSentencesWithTooManyWords($sentences);

if (count($sentencesWithTooManyWords) === 0) {
return true;
}

// If more than 20% of the total sentences are too long, fail
if (count($sentencesWithTooManyWords) / count($sentences) > 0.2) {
$this->failureReason = __('failed.content.too_long_sentence', [
'actualValue' => count($this->actualValue),
]);

return false;
}

return true;
}

private function separateSentencesByDot(array $sentences): array
{
$newSentences = [];

foreach ($sentences as $sentence) {
$sentence = explode('.', $sentence);
$newSentences = array_merge($newSentences, $sentence);
}

// Remove all sentences that are empty
$sentences = array_filter($newSentences, function ($sentence) {
return ! empty($sentence);
});

return $sentences;
}

private function getSentencesFromCrawler(Crawler $crawler): array
{
$content = $crawler->filterXPath('//body')->children();

// Get all elements that contain text
$content = $content->filterXPath('//*/text()[normalize-space()]');

$content = $content->each(function (Crawler $node, $i) {
return $node->text();
});

return $content;
}

private function calculateSentencesWithTooManyWords(array $sentences): array
{
$tooLongSentences = [];

foreach ($sentences as $sentence) {
if (str_word_count($sentence) > 20) {
$tooLongSentences[] = $sentence;
}
}

return $tooLongSentences;
}
}
74 changes: 74 additions & 0 deletions tests/Checks/Content/TooLongSentenceCheckTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

use Illuminate\Support\Facades\Http;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Checks\Content\TooLongSentenceCheck;

it('can perform the too long sentence check on page with too long sentence', function () {
$check = new TooLongSentenceCheck();
$crawler = new Crawler();

$body = 'One two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty twenty-one.';

Http::fake([
'vormkracht10.nl' => Http::response(
'<html>
<head>
<title>Test</title>
</head>
<body>
<p>'.$body.'</p>
</body>',
200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can perform the too long sentence check on page with no too long sentence', function () {
$check = new TooLongSentenceCheck();
$crawler = new Crawler();

$body = 'One two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen';

Http::fake([
'vormkracht10.nl' => Http::response(
'<html>
<head>
<title>Test</title>
</head>
<body>
<p>'.$body.'</p>
</body>',
200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$check->check(Http::get('vormkracht10.nl'), $crawler);

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can perform the too long sentence check on page with no body', function () {
$check = new TooLongSentenceCheck();
$crawler = new Crawler();

Http::fake([
'vormkracht10.nl' => Http::response(
'<html>
<head>
<title>Test</title>
</head>
<body></body>',
200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$check->check(Http::get('vormkracht10.nl'), $crawler);

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

0 comments on commit 2d95789

Please sign in to comment.