Skip to content

Commit

Permalink
[Fix] Decode gzip content to prevent uncheckable page content (#53)
Browse files Browse the repository at this point in the history
* Decode gzip content

* Check on different variants of meta description tags

* Fix styling

* Improve if node check

* Fix styling

---------

Co-authored-by: Baspa <Baspa@users.noreply.github.com>
  • Loading branch information
Baspa and Baspa authored Jan 25, 2024
1 parent f1f02fb commit 2cb86b7
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 13 deletions.
31 changes: 22 additions & 9 deletions src/Checks/Meta/DescriptionCheck.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,33 @@ public function check(Response $response, Crawler $crawler): bool
return true;
}

public function validateContent(Crawler $crawler): bool
public function getDescriptionContent(Crawler $crawler): ?string
{
$node = $crawler->filterXPath('//meta[@name="description"]')->getNode(0);
$tags = ['description', 'og:description', 'twitter:description'];

if (! $node) {
return false;
}
foreach ($tags as $tag) {
$property = $tag === 'og:description' ? 'property' : 'name';

$content = $crawler->filterXPath('//meta[@name="description"]')->attr('content');
/** @var \DOMElement $node */
$node = $crawler->filterXPath("//meta[@{$property}=\"{$tag}\"]")->getNode(0);

if (! $content) {
return false;
if ($node instanceof \DOMElement && $node->hasAttribute('content')) {
return $node->getAttribute('content');
}
}

return true;
return null;
}

public function validateContent(Crawler $crawler): bool
{
$content = $this->getDescriptionContent($crawler);

return ! empty($content);
}

public function isDescriptionSet(Crawler $crawler): bool
{
return $this->getDescriptionContent($crawler) !== null;
}
}
2 changes: 1 addition & 1 deletion src/Seo.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ private function visitPage(string $url): object
$options = (array) config('seo.http.options', []);

$response = $this->http::withOptions([
'decode_content' => false,
'decode_content' => 'gzip',
...$options,
])
->withHeaders([
Expand Down
32 changes: 29 additions & 3 deletions tests/Checks/Meta/DescriptionCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,46 @@
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Checks\Meta\DescriptionCheck;

it('can perform the description check on a page with a description', function () {
it('can perform the description check on a page with an og:description', function () {
$check = new DescriptionCheck();
$crawler = new Crawler();

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="description" content="Vormkracht10 is a web development agency based in Amsterdam."></head><body></body></html>', 200),
'vormkracht10.nl' => Http::response('<html><head><meta property="og:description" content="Vormkracht10 is a web development agency based in Amsterdam."></head><body></body></html>', 200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can perform the description check on a page without a description', function () {
it('can perform the description check on a page with a twitter:description', function () {
$check = new DescriptionCheck();
$crawler = new Crawler();

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="twitter:description" content="Vormkracht10 is a web development agency based in Amsterdam."></head><body></body></html>', 200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can perform the description check on a page with multiple description tags', function () {
$check = new DescriptionCheck();
$crawler = new Crawler();

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="description" content="Vormkracht10 is a web development agency based in Amsterdam."><meta property="og:description" content="Vormkracht10 is a web development agency based in Amsterdam."><meta name="twitter:description" content="Vormkracht10 is a web development agency based in Amsterdam."></head><body></body></html>', 200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can perform the description check on a page without any description tags', function () {
$check = new DescriptionCheck();
$crawler = new Crawler();

Expand Down

0 comments on commit 2cb86b7

Please sign in to comment.