From 5d2e8a934d8dd22619c734fccd47820bff921542 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Wed, 29 Aug 2018 10:07:39 +0200 Subject: [PATCH] FEATURE: Screenshot whole website into sub folder --- .gitignore | 3 + comparison | 11 ++ composer.json | 23 +++ src/Command/CreateBaseCommand.php | 101 +++++++++++ src/Model/UrlListDto.php | 60 +++++++ src/Service/ScreenshotCrawlerService.php | 207 +++++++++++++++++++++++ 6 files changed, 405 insertions(+) create mode 100644 .gitignore create mode 100755 comparison create mode 100644 composer.json create mode 100644 src/Command/CreateBaseCommand.php create mode 100644 src/Model/UrlListDto.php create mode 100644 src/Service/ScreenshotCrawlerService.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..75d1064 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/vendor/ +/composer.lock +/output/ diff --git a/comparison b/comparison new file mode 100755 index 0000000..9385d64 --- /dev/null +++ b/comparison @@ -0,0 +1,11 @@ +#!/usr/bin/env php +add(new CreateBaseCommand()); +$application->run(); diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..c8bc4bb --- /dev/null +++ b/composer.json @@ -0,0 +1,23 @@ +{ + "name": "codappix/website-comparison", + "description": "Compares a Website visually by comparing Screenshots.", + "type": "project", + "license": "GPL-2.0-or-later", + "authors": [ + { + "name": "Daniel Siepmann", + "email": "coding@daniel-siepmann.de" + } + ], + "autoload": { + "psr-4": { + "Codappix\\WebsiteComparison\\": "src/" + } + }, + "require": { + "facebook/webdriver": "^1.6", + "symfony/console": "^4.1", + "symfony/process": "^4.1", + "guzzlehttp/psr7": "^1.4" + } +} diff --git a/src/Command/CreateBaseCommand.php b/src/Command/CreateBaseCommand.php new file mode 100644 index 0000000..b56da72 --- /dev/null +++ b/src/Command/CreateBaseCommand.php @@ -0,0 +1,101 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\WebsiteComparison\Service\ScreenshotCrawlerService; +use Facebook\WebDriver\Chrome\ChromeDriver; +use Facebook\WebDriver\Chrome\ChromeDriverService; +use Symfony\Component\Console\Command\Command; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; +use Symfony\Component\Process\Exception\ProcessFailedException; +use Symfony\Component\Process\Process; + +/** + * + */ +class CreateBaseCommand extends Command +{ + /** + * @var Process + */ + protected $chromeProcess; + + protected function configure() + { + $this + ->setName('comparison:createbase') + ->setDescription('Creates the base for comparison.') + ->setHelp('Crawls and screenshots the original website, as a base for future comparison.') + + ->addOption( + 'screenshotDir', + null, + InputOption::VALUE_OPTIONAL, + 'Define the sub directory to use for storing created Screenshots.', + 'output' + ) + ->addOption( + 'screenshotWidth', + null, + InputOption::VALUE_OPTIONAL, + 'The width for screen resolution and screenshots.', + 3840 + ) + + ->addArgument( + 'baseUrl', + InputArgument::REQUIRED, + 'E.g. https://typo3.org/ the base url of the website to crawl.' + ) + ; + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $screenshotCrawler = new ScreenshotCrawlerService( + $output, + $this->getDriver(), + $input->getArgument('baseUrl'), + $input->getOption('screenshotDir'), + $input->getOption('screenshotWidth') + ); + $screenshotCrawler->crawl(); + } + + protected function getDriver(): ChromeDriver + { + $chromeDriverService = new ChromeDriverService( + '/usr/lib/chromium-browser/chromedriver', + 9515, + [ + '--port=9515', + '--headless', + ] + ); + $driver = ChromeDriver::start(null, $chromeDriverService); + + return $driver; + } +} diff --git a/src/Model/UrlListDto.php b/src/Model/UrlListDto.php new file mode 100644 index 0000000..0573968 --- /dev/null +++ b/src/Model/UrlListDto.php @@ -0,0 +1,60 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * + */ +class UrlListDto +{ + protected $finishedUrls = []; + + protected $upcomingUrls = []; + + public function addUrl(string $link) + { + if ($this->isUrlKnown($link)) { + return; + } + + $this->upcomingUrls[] = $link; + } + + public function getNextUrl(): string + { + return reset($this->upcomingUrls) ?? ''; + } + + public function markUrlAsFinished(string $link) + { + $upcomingEntry = array_search($link, $this->upcomingUrls); + + unset($this->upcomingUrls[$upcomingEntry]); + + $this->finishedUrls[] = $link; + } + + public function isUrlKnown(string $link): bool + { + return in_array($link, $this->finishedUrls) || in_array($link, $this->upcomingUrls); + } +} diff --git a/src/Service/ScreenshotCrawlerService.php b/src/Service/ScreenshotCrawlerService.php new file mode 100644 index 0000000..c72b931 --- /dev/null +++ b/src/Service/ScreenshotCrawlerService.php @@ -0,0 +1,207 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\WebsiteComparison\Model\UrlListDto; +use Facebook\WebDriver\Remote\RemoteWebDriver; +use Facebook\WebDriver\Remote\RemoteWebElement; +use Facebook\WebDriver\WebDriverBy; +use GuzzleHttp\Psr7\Uri; +use Symfony\Component\Console\Output\OutputInterface; +use Symfony\Component\Process\Process; + +/** + * + */ +class ScreenshotCrawlerService +{ + /** + * @var OutputInterface + */ + protected $output; + + /** + * @var RemoteWebDriver + */ + protected $driver; + + /** + * @var string + */ + protected $baseUrl = ''; + + /** + * @var string + */ + protected $screenshotDir = ''; + + /** + * @var int + */ + protected $screenshotWidth = 3840; + + public function __construct( + OutputInterface $output, + RemoteWebDriver $driver, + string $baseUrl, + string $screenshotDir = 'output/', + int $screenshotWidth = 3840 + ) { + $this->output = $output; + $this->driver = $driver; + $this->baseUrl = rtrim($baseUrl, '/') . '/'; + $this->screenshotDir = implode(DIRECTORY_SEPARATOR, [ + dirname(dirname(dirname(__FILE__))), + rtrim($screenshotDir, '/') + ]) . DIRECTORY_SEPARATOR; + $this->screenshotWidth = $screenshotWidth; + } + + public function crawl() + { + $this->createScreenshotDirIfNecessary(); + + $linkList = new UrlListDto(); + $linkList->addUrl($this->baseUrl); + + while ($url = $linkList->getNextUrl()) { + $this->driver->get($url); + $screenshotHeight = $this->driver->findElement(WebDriverBy::cssSelector('body')) + ->getSize() + ->getHeight(); + $this->createScreenshot($this->driver->getCurrentURL(), $screenshotHeight); + + $linkList->markUrlAsFinished($url); + array_map([$linkList, 'addUrl'], $this->fetchFurtherLinks( + $this->driver->findElements(WebDriverBy::cssSelector('a')) + )); + } + } + + /** + * @throws \Exception If folder could not be created. + */ + protected function createScreenshotDirIfNecessary(string $subPath = '') + { + $dir = $this->screenshotDir; + if ($subPath !== '') { + $dir = $dir . DIRECTORY_SEPARATOR . trim($subPath, DIRECTORY_SEPARATOR); + } + if (!is_dir($dir)) { + mkdir($dir, 0777, true); + } + + if (!is_dir($this->screenshotDir)) { + throw new \Exception('Could not create screenshot dir: "' . $dir . '".', 1535528875); + } + } + + protected function createScreenshot(string $url, int $height) + { + $screenshotTarget = $this->getScreenshotTarget($url); + $this->createScreenshotDirIfNecessary(dirname($screenshotTarget)); + + $screenshotProcess = new Process([ + 'chromium-browser', + '--headless', + '--disable-gpu', + '--window-size=' . $this->screenshotWidth . ',' . $height, + '--screenshot=' . $this->screenshotDir . $screenshotTarget, + $url + ]); + // TODO: Check for success + $screenshotProcess->run(); + + if ($this->output->isVerbose()) { + $this->output->writeln(sprintf( + 'Created screenshot "%s" for url "%s".', + $this->screenshotDir . $screenshotTarget, + $url + )); + } + } + + protected function getScreenshotTarget(string $url) + { + $uri = new Uri($url); + + return implode( + DIRECTORY_SEPARATOR, + array_filter( + [ + $uri->getScheme(), + $uri->getHost(), + trim($uri->getPath(), '/'), + $uri->getQuery(), + ], + function (string $string) { + return trim($string, ' /') !== ''; + } + ) + ) . '.png'; + } + + protected function fetchFurtherLinks(array $webElements): array + { + $links = []; + foreach ($webElements as $webElement) { + try { + $link = $this->fetchLinkFromElement($webElement); + } catch (\Exception $e) { + continue; + } + + $links[] = $link; + } + + return $links; + } + + protected function fetchLinkFromElement(RemoteWebElement $element): string + { + $uri = null; + $href = $element->getAttribute('href'); + if (is_string($href)) { + $uri = new Uri($href); + } + + if ($uri === null) { + throw new \Exception('Did not get a Uri for element.', 1535530859); + } + + if ($this->isInternalLink($uri)) { + return (string) $uri; + } + + throw new \Exception('Was external link.', 1535639056); + } + + protected function isInternalLink(Uri $uri): bool + { + $validHosts = [ + '', + (new Uri($this->baseUrl))->getHost(), + ]; + + return in_array($uri->getHost(), $validHosts); + } +}