FEATURE: Screenshot whole website into sub folder
This commit is contained in:
parent
5fa39ea7ff
commit
5d2e8a934d
6 changed files with 405 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
/vendor/
|
||||||
|
/composer.lock
|
||||||
|
/output/
|
11
comparison
Executable file
11
comparison
Executable file
|
@ -0,0 +1,11 @@
|
||||||
|
#!/usr/bin/env php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require __DIR__ . '/vendor/autoload.php';
|
||||||
|
|
||||||
|
use Codappix\WebsiteComparison\Command\CreateBaseCommand;
|
||||||
|
use Symfony\Component\Console\Application;
|
||||||
|
|
||||||
|
$application = new Application();
|
||||||
|
$application->add(new CreateBaseCommand());
|
||||||
|
$application->run();
|
23
composer.json
Normal file
23
composer.json
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"name": "codappix/website-comparison",
|
||||||
|
"description": "Compares a Website visually by comparing Screenshots.",
|
||||||
|
"type": "project",
|
||||||
|
"license": "GPL-2.0-or-later",
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Daniel Siepmann",
|
||||||
|
"email": "coding@daniel-siepmann.de"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"Codappix\\WebsiteComparison\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"facebook/webdriver": "^1.6",
|
||||||
|
"symfony/console": "^4.1",
|
||||||
|
"symfony/process": "^4.1",
|
||||||
|
"guzzlehttp/psr7": "^1.4"
|
||||||
|
}
|
||||||
|
}
|
101
src/Command/CreateBaseCommand.php
Normal file
101
src/Command/CreateBaseCommand.php
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Codappix\WebsiteComparison\Command;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use Codappix\WebsiteComparison\Service\ScreenshotCrawlerService;
|
||||||
|
use Facebook\WebDriver\Chrome\ChromeDriver;
|
||||||
|
use Facebook\WebDriver\Chrome\ChromeDriverService;
|
||||||
|
use Symfony\Component\Console\Command\Command;
|
||||||
|
use Symfony\Component\Console\Input\InputArgument;
|
||||||
|
use Symfony\Component\Console\Input\InputInterface;
|
||||||
|
use Symfony\Component\Console\Input\InputOption;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
||||||
|
use Symfony\Component\Process\Process;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class CreateBaseCommand extends Command
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Process
|
||||||
|
*/
|
||||||
|
protected $chromeProcess;
|
||||||
|
|
||||||
|
protected function configure()
|
||||||
|
{
|
||||||
|
$this
|
||||||
|
->setName('comparison:createbase')
|
||||||
|
->setDescription('Creates the base for comparison.')
|
||||||
|
->setHelp('Crawls and screenshots the original website, as a base for future comparison.')
|
||||||
|
|
||||||
|
->addOption(
|
||||||
|
'screenshotDir',
|
||||||
|
null,
|
||||||
|
InputOption::VALUE_OPTIONAL,
|
||||||
|
'Define the sub directory to use for storing created Screenshots.',
|
||||||
|
'output'
|
||||||
|
)
|
||||||
|
->addOption(
|
||||||
|
'screenshotWidth',
|
||||||
|
null,
|
||||||
|
InputOption::VALUE_OPTIONAL,
|
||||||
|
'The width for screen resolution and screenshots.',
|
||||||
|
3840
|
||||||
|
)
|
||||||
|
|
||||||
|
->addArgument(
|
||||||
|
'baseUrl',
|
||||||
|
InputArgument::REQUIRED,
|
||||||
|
'E.g. https://typo3.org/ the base url of the website to crawl.'
|
||||||
|
)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function execute(InputInterface $input, OutputInterface $output)
|
||||||
|
{
|
||||||
|
$screenshotCrawler = new ScreenshotCrawlerService(
|
||||||
|
$output,
|
||||||
|
$this->getDriver(),
|
||||||
|
$input->getArgument('baseUrl'),
|
||||||
|
$input->getOption('screenshotDir'),
|
||||||
|
$input->getOption('screenshotWidth')
|
||||||
|
);
|
||||||
|
$screenshotCrawler->crawl();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getDriver(): ChromeDriver
|
||||||
|
{
|
||||||
|
$chromeDriverService = new ChromeDriverService(
|
||||||
|
'/usr/lib/chromium-browser/chromedriver',
|
||||||
|
9515,
|
||||||
|
[
|
||||||
|
'--port=9515',
|
||||||
|
'--headless',
|
||||||
|
]
|
||||||
|
);
|
||||||
|
$driver = ChromeDriver::start(null, $chromeDriverService);
|
||||||
|
|
||||||
|
return $driver;
|
||||||
|
}
|
||||||
|
}
|
60
src/Model/UrlListDto.php
Normal file
60
src/Model/UrlListDto.php
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Codappix\WebsiteComparison\Model;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class UrlListDto
|
||||||
|
{
|
||||||
|
protected $finishedUrls = [];
|
||||||
|
|
||||||
|
protected $upcomingUrls = [];
|
||||||
|
|
||||||
|
public function addUrl(string $link)
|
||||||
|
{
|
||||||
|
if ($this->isUrlKnown($link)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->upcomingUrls[] = $link;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getNextUrl(): string
|
||||||
|
{
|
||||||
|
return reset($this->upcomingUrls) ?? '';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function markUrlAsFinished(string $link)
|
||||||
|
{
|
||||||
|
$upcomingEntry = array_search($link, $this->upcomingUrls);
|
||||||
|
|
||||||
|
unset($this->upcomingUrls[$upcomingEntry]);
|
||||||
|
|
||||||
|
$this->finishedUrls[] = $link;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function isUrlKnown(string $link): bool
|
||||||
|
{
|
||||||
|
return in_array($link, $this->finishedUrls) || in_array($link, $this->upcomingUrls);
|
||||||
|
}
|
||||||
|
}
|
207
src/Service/ScreenshotCrawlerService.php
Normal file
207
src/Service/ScreenshotCrawlerService.php
Normal file
|
@ -0,0 +1,207 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Codappix\WebsiteComparison\Service;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use Codappix\WebsiteComparison\Model\UrlListDto;
|
||||||
|
use Facebook\WebDriver\Remote\RemoteWebDriver;
|
||||||
|
use Facebook\WebDriver\Remote\RemoteWebElement;
|
||||||
|
use Facebook\WebDriver\WebDriverBy;
|
||||||
|
use GuzzleHttp\Psr7\Uri;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use Symfony\Component\Process\Process;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class ScreenshotCrawlerService
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var OutputInterface
|
||||||
|
*/
|
||||||
|
protected $output;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var RemoteWebDriver
|
||||||
|
*/
|
||||||
|
protected $driver;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $baseUrl = '';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $screenshotDir = '';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $screenshotWidth = 3840;
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
OutputInterface $output,
|
||||||
|
RemoteWebDriver $driver,
|
||||||
|
string $baseUrl,
|
||||||
|
string $screenshotDir = 'output/',
|
||||||
|
int $screenshotWidth = 3840
|
||||||
|
) {
|
||||||
|
$this->output = $output;
|
||||||
|
$this->driver = $driver;
|
||||||
|
$this->baseUrl = rtrim($baseUrl, '/') . '/';
|
||||||
|
$this->screenshotDir = implode(DIRECTORY_SEPARATOR, [
|
||||||
|
dirname(dirname(dirname(__FILE__))),
|
||||||
|
rtrim($screenshotDir, '/')
|
||||||
|
]) . DIRECTORY_SEPARATOR;
|
||||||
|
$this->screenshotWidth = $screenshotWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function crawl()
|
||||||
|
{
|
||||||
|
$this->createScreenshotDirIfNecessary();
|
||||||
|
|
||||||
|
$linkList = new UrlListDto();
|
||||||
|
$linkList->addUrl($this->baseUrl);
|
||||||
|
|
||||||
|
while ($url = $linkList->getNextUrl()) {
|
||||||
|
$this->driver->get($url);
|
||||||
|
$screenshotHeight = $this->driver->findElement(WebDriverBy::cssSelector('body'))
|
||||||
|
->getSize()
|
||||||
|
->getHeight();
|
||||||
|
$this->createScreenshot($this->driver->getCurrentURL(), $screenshotHeight);
|
||||||
|
|
||||||
|
$linkList->markUrlAsFinished($url);
|
||||||
|
array_map([$linkList, 'addUrl'], $this->fetchFurtherLinks(
|
||||||
|
$this->driver->findElements(WebDriverBy::cssSelector('a'))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws \Exception If folder could not be created.
|
||||||
|
*/
|
||||||
|
protected function createScreenshotDirIfNecessary(string $subPath = '')
|
||||||
|
{
|
||||||
|
$dir = $this->screenshotDir;
|
||||||
|
if ($subPath !== '') {
|
||||||
|
$dir = $dir . DIRECTORY_SEPARATOR . trim($subPath, DIRECTORY_SEPARATOR);
|
||||||
|
}
|
||||||
|
if (!is_dir($dir)) {
|
||||||
|
mkdir($dir, 0777, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_dir($this->screenshotDir)) {
|
||||||
|
throw new \Exception('Could not create screenshot dir: "' . $dir . '".', 1535528875);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function createScreenshot(string $url, int $height)
|
||||||
|
{
|
||||||
|
$screenshotTarget = $this->getScreenshotTarget($url);
|
||||||
|
$this->createScreenshotDirIfNecessary(dirname($screenshotTarget));
|
||||||
|
|
||||||
|
$screenshotProcess = new Process([
|
||||||
|
'chromium-browser',
|
||||||
|
'--headless',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=' . $this->screenshotWidth . ',' . $height,
|
||||||
|
'--screenshot=' . $this->screenshotDir . $screenshotTarget,
|
||||||
|
$url
|
||||||
|
]);
|
||||||
|
// TODO: Check for success
|
||||||
|
$screenshotProcess->run();
|
||||||
|
|
||||||
|
if ($this->output->isVerbose()) {
|
||||||
|
$this->output->writeln(sprintf(
|
||||||
|
'<info>Created screenshot "%s" for url "%s".</info>',
|
||||||
|
$this->screenshotDir . $screenshotTarget,
|
||||||
|
$url
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getScreenshotTarget(string $url)
|
||||||
|
{
|
||||||
|
$uri = new Uri($url);
|
||||||
|
|
||||||
|
return implode(
|
||||||
|
DIRECTORY_SEPARATOR,
|
||||||
|
array_filter(
|
||||||
|
[
|
||||||
|
$uri->getScheme(),
|
||||||
|
$uri->getHost(),
|
||||||
|
trim($uri->getPath(), '/'),
|
||||||
|
$uri->getQuery(),
|
||||||
|
],
|
||||||
|
function (string $string) {
|
||||||
|
return trim($string, ' /') !== '';
|
||||||
|
}
|
||||||
|
)
|
||||||
|
) . '.png';
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function fetchFurtherLinks(array $webElements): array
|
||||||
|
{
|
||||||
|
$links = [];
|
||||||
|
foreach ($webElements as $webElement) {
|
||||||
|
try {
|
||||||
|
$link = $this->fetchLinkFromElement($webElement);
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$links[] = $link;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $links;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function fetchLinkFromElement(RemoteWebElement $element): string
|
||||||
|
{
|
||||||
|
$uri = null;
|
||||||
|
$href = $element->getAttribute('href');
|
||||||
|
if (is_string($href)) {
|
||||||
|
$uri = new Uri($href);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($uri === null) {
|
||||||
|
throw new \Exception('Did not get a Uri for element.', 1535530859);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->isInternalLink($uri)) {
|
||||||
|
return (string) $uri;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new \Exception('Was external link.', 1535639056);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function isInternalLink(Uri $uri): bool
|
||||||
|
{
|
||||||
|
$validHosts = [
|
||||||
|
'',
|
||||||
|
(new Uri($this->baseUrl))->getHost(),
|
||||||
|
];
|
||||||
|
|
||||||
|
return in_array($uri->getHost(), $validHosts);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue