FEATURE: Screenshot whole website into sub folder
This commit is contained in:
parent
5fa39ea7ff
commit
5d2e8a934d
6 changed files with 405 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/vendor/
|
||||
/composer.lock
|
||||
/output/
|
11
comparison
Executable file
11
comparison
Executable file
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
require __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use Codappix\WebsiteComparison\Command\CreateBaseCommand;
|
||||
use Symfony\Component\Console\Application;
|
||||
|
||||
$application = new Application();
|
||||
$application->add(new CreateBaseCommand());
|
||||
$application->run();
|
23
composer.json
Normal file
23
composer.json
Normal file
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"name": "codappix/website-comparison",
|
||||
"description": "Compares a Website visually by comparing Screenshots.",
|
||||
"type": "project",
|
||||
"license": "GPL-2.0-or-later",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Daniel Siepmann",
|
||||
"email": "coding@daniel-siepmann.de"
|
||||
}
|
||||
],
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Codappix\\WebsiteComparison\\": "src/"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"facebook/webdriver": "^1.6",
|
||||
"symfony/console": "^4.1",
|
||||
"symfony/process": "^4.1",
|
||||
"guzzlehttp/psr7": "^1.4"
|
||||
}
|
||||
}
|
101
src/Command/CreateBaseCommand.php
Normal file
101
src/Command/CreateBaseCommand.php
Normal file
|
@ -0,0 +1,101 @@
|
|||
<?php
|
||||
|
||||
namespace Codappix\WebsiteComparison\Command;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\WebsiteComparison\Service\ScreenshotCrawlerService;
|
||||
use Facebook\WebDriver\Chrome\ChromeDriver;
|
||||
use Facebook\WebDriver\Chrome\ChromeDriverService;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Process\Exception\ProcessFailedException;
|
||||
use Symfony\Component\Process\Process;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class CreateBaseCommand extends Command
|
||||
{
|
||||
/**
|
||||
* @var Process
|
||||
*/
|
||||
protected $chromeProcess;
|
||||
|
||||
protected function configure()
|
||||
{
|
||||
$this
|
||||
->setName('comparison:createbase')
|
||||
->setDescription('Creates the base for comparison.')
|
||||
->setHelp('Crawls and screenshots the original website, as a base for future comparison.')
|
||||
|
||||
->addOption(
|
||||
'screenshotDir',
|
||||
null,
|
||||
InputOption::VALUE_OPTIONAL,
|
||||
'Define the sub directory to use for storing created Screenshots.',
|
||||
'output'
|
||||
)
|
||||
->addOption(
|
||||
'screenshotWidth',
|
||||
null,
|
||||
InputOption::VALUE_OPTIONAL,
|
||||
'The width for screen resolution and screenshots.',
|
||||
3840
|
||||
)
|
||||
|
||||
->addArgument(
|
||||
'baseUrl',
|
||||
InputArgument::REQUIRED,
|
||||
'E.g. https://typo3.org/ the base url of the website to crawl.'
|
||||
)
|
||||
;
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output)
|
||||
{
|
||||
$screenshotCrawler = new ScreenshotCrawlerService(
|
||||
$output,
|
||||
$this->getDriver(),
|
||||
$input->getArgument('baseUrl'),
|
||||
$input->getOption('screenshotDir'),
|
||||
$input->getOption('screenshotWidth')
|
||||
);
|
||||
$screenshotCrawler->crawl();
|
||||
}
|
||||
|
||||
protected function getDriver(): ChromeDriver
|
||||
{
|
||||
$chromeDriverService = new ChromeDriverService(
|
||||
'/usr/lib/chromium-browser/chromedriver',
|
||||
9515,
|
||||
[
|
||||
'--port=9515',
|
||||
'--headless',
|
||||
]
|
||||
);
|
||||
$driver = ChromeDriver::start(null, $chromeDriverService);
|
||||
|
||||
return $driver;
|
||||
}
|
||||
}
|
60
src/Model/UrlListDto.php
Normal file
60
src/Model/UrlListDto.php
Normal file
|
@ -0,0 +1,60 @@
|
|||
<?php
|
||||
|
||||
namespace Codappix\WebsiteComparison\Model;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class UrlListDto
|
||||
{
|
||||
protected $finishedUrls = [];
|
||||
|
||||
protected $upcomingUrls = [];
|
||||
|
||||
public function addUrl(string $link)
|
||||
{
|
||||
if ($this->isUrlKnown($link)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->upcomingUrls[] = $link;
|
||||
}
|
||||
|
||||
public function getNextUrl(): string
|
||||
{
|
||||
return reset($this->upcomingUrls) ?? '';
|
||||
}
|
||||
|
||||
public function markUrlAsFinished(string $link)
|
||||
{
|
||||
$upcomingEntry = array_search($link, $this->upcomingUrls);
|
||||
|
||||
unset($this->upcomingUrls[$upcomingEntry]);
|
||||
|
||||
$this->finishedUrls[] = $link;
|
||||
}
|
||||
|
||||
public function isUrlKnown(string $link): bool
|
||||
{
|
||||
return in_array($link, $this->finishedUrls) || in_array($link, $this->upcomingUrls);
|
||||
}
|
||||
}
|
207
src/Service/ScreenshotCrawlerService.php
Normal file
207
src/Service/ScreenshotCrawlerService.php
Normal file
|
@ -0,0 +1,207 @@
|
|||
<?php
|
||||
|
||||
namespace Codappix\WebsiteComparison\Service;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\WebsiteComparison\Model\UrlListDto;
|
||||
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
||||
use Facebook\WebDriver\Remote\RemoteWebElement;
|
||||
use Facebook\WebDriver\WebDriverBy;
|
||||
use GuzzleHttp\Psr7\Uri;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Process\Process;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class ScreenshotCrawlerService
|
||||
{
|
||||
/**
|
||||
* @var OutputInterface
|
||||
*/
|
||||
protected $output;
|
||||
|
||||
/**
|
||||
* @var RemoteWebDriver
|
||||
*/
|
||||
protected $driver;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $baseUrl = '';
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $screenshotDir = '';
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $screenshotWidth = 3840;
|
||||
|
||||
public function __construct(
|
||||
OutputInterface $output,
|
||||
RemoteWebDriver $driver,
|
||||
string $baseUrl,
|
||||
string $screenshotDir = 'output/',
|
||||
int $screenshotWidth = 3840
|
||||
) {
|
||||
$this->output = $output;
|
||||
$this->driver = $driver;
|
||||
$this->baseUrl = rtrim($baseUrl, '/') . '/';
|
||||
$this->screenshotDir = implode(DIRECTORY_SEPARATOR, [
|
||||
dirname(dirname(dirname(__FILE__))),
|
||||
rtrim($screenshotDir, '/')
|
||||
]) . DIRECTORY_SEPARATOR;
|
||||
$this->screenshotWidth = $screenshotWidth;
|
||||
}
|
||||
|
||||
public function crawl()
|
||||
{
|
||||
$this->createScreenshotDirIfNecessary();
|
||||
|
||||
$linkList = new UrlListDto();
|
||||
$linkList->addUrl($this->baseUrl);
|
||||
|
||||
while ($url = $linkList->getNextUrl()) {
|
||||
$this->driver->get($url);
|
||||
$screenshotHeight = $this->driver->findElement(WebDriverBy::cssSelector('body'))
|
||||
->getSize()
|
||||
->getHeight();
|
||||
$this->createScreenshot($this->driver->getCurrentURL(), $screenshotHeight);
|
||||
|
||||
$linkList->markUrlAsFinished($url);
|
||||
array_map([$linkList, 'addUrl'], $this->fetchFurtherLinks(
|
||||
$this->driver->findElements(WebDriverBy::cssSelector('a'))
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws \Exception If folder could not be created.
|
||||
*/
|
||||
protected function createScreenshotDirIfNecessary(string $subPath = '')
|
||||
{
|
||||
$dir = $this->screenshotDir;
|
||||
if ($subPath !== '') {
|
||||
$dir = $dir . DIRECTORY_SEPARATOR . trim($subPath, DIRECTORY_SEPARATOR);
|
||||
}
|
||||
if (!is_dir($dir)) {
|
||||
mkdir($dir, 0777, true);
|
||||
}
|
||||
|
||||
if (!is_dir($this->screenshotDir)) {
|
||||
throw new \Exception('Could not create screenshot dir: "' . $dir . '".', 1535528875);
|
||||
}
|
||||
}
|
||||
|
||||
protected function createScreenshot(string $url, int $height)
|
||||
{
|
||||
$screenshotTarget = $this->getScreenshotTarget($url);
|
||||
$this->createScreenshotDirIfNecessary(dirname($screenshotTarget));
|
||||
|
||||
$screenshotProcess = new Process([
|
||||
'chromium-browser',
|
||||
'--headless',
|
||||
'--disable-gpu',
|
||||
'--window-size=' . $this->screenshotWidth . ',' . $height,
|
||||
'--screenshot=' . $this->screenshotDir . $screenshotTarget,
|
||||
$url
|
||||
]);
|
||||
// TODO: Check for success
|
||||
$screenshotProcess->run();
|
||||
|
||||
if ($this->output->isVerbose()) {
|
||||
$this->output->writeln(sprintf(
|
||||
'<info>Created screenshot "%s" for url "%s".</info>',
|
||||
$this->screenshotDir . $screenshotTarget,
|
||||
$url
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
protected function getScreenshotTarget(string $url)
|
||||
{
|
||||
$uri = new Uri($url);
|
||||
|
||||
return implode(
|
||||
DIRECTORY_SEPARATOR,
|
||||
array_filter(
|
||||
[
|
||||
$uri->getScheme(),
|
||||
$uri->getHost(),
|
||||
trim($uri->getPath(), '/'),
|
||||
$uri->getQuery(),
|
||||
],
|
||||
function (string $string) {
|
||||
return trim($string, ' /') !== '';
|
||||
}
|
||||
)
|
||||
) . '.png';
|
||||
}
|
||||
|
||||
protected function fetchFurtherLinks(array $webElements): array
|
||||
{
|
||||
$links = [];
|
||||
foreach ($webElements as $webElement) {
|
||||
try {
|
||||
$link = $this->fetchLinkFromElement($webElement);
|
||||
} catch (\Exception $e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$links[] = $link;
|
||||
}
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
||||
protected function fetchLinkFromElement(RemoteWebElement $element): string
|
||||
{
|
||||
$uri = null;
|
||||
$href = $element->getAttribute('href');
|
||||
if (is_string($href)) {
|
||||
$uri = new Uri($href);
|
||||
}
|
||||
|
||||
if ($uri === null) {
|
||||
throw new \Exception('Did not get a Uri for element.', 1535530859);
|
||||
}
|
||||
|
||||
if ($this->isInternalLink($uri)) {
|
||||
return (string) $uri;
|
||||
}
|
||||
|
||||
throw new \Exception('Was external link.', 1535639056);
|
||||
}
|
||||
|
||||
protected function isInternalLink(Uri $uri): bool
|
||||
{
|
||||
$validHosts = [
|
||||
'',
|
||||
(new Uri($this->baseUrl))->getHost(),
|
||||
];
|
||||
|
||||
return in_array($uri->getHost(), $validHosts);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue