2017-07-04 12:12:36 +02:00
|
|
|
<?php
|
2018-10-02 16:25:05 +02:00
|
|
|
|
2017-07-06 23:48:47 +02:00
|
|
|
namespace Codappix\SearchCore\Domain\Index;
|
2017-07-04 12:12:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
* 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
2017-07-13 12:51:36 +02:00
|
|
|
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
2017-07-27 13:00:51 +02:00
|
|
|
use Codappix\SearchCore\Configuration\InvalidArgumentException;
|
2017-07-06 23:48:47 +02:00
|
|
|
use Codappix\SearchCore\Connection\ConnectionInterface;
|
2018-10-11 16:50:32 +02:00
|
|
|
use Elastica\Query;
|
2017-11-08 21:05:53 +01:00
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
2017-07-04 12:12:36 +02:00
|
|
|
|
|
|
|
abstract class AbstractIndexer implements IndexerInterface
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var ConnectionInterface
|
|
|
|
*/
|
|
|
|
protected $connection;
|
|
|
|
|
2017-07-13 12:51:36 +02:00
|
|
|
/**
|
|
|
|
* @var ConfigurationContainerInterface
|
|
|
|
*/
|
|
|
|
protected $configuration;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string
|
|
|
|
*/
|
2017-07-27 16:31:37 +02:00
|
|
|
protected $identifier = '';
|
2017-07-13 12:51:36 +02:00
|
|
|
|
2018-03-01 08:03:51 +01:00
|
|
|
/**
|
|
|
|
* @var \Codappix\SearchCore\DataProcessing\Service
|
|
|
|
* @inject
|
|
|
|
*/
|
|
|
|
protected $dataProcessorService;
|
|
|
|
|
2017-07-04 12:12:36 +02:00
|
|
|
/**
|
|
|
|
* @var \TYPO3\CMS\Core\Log\Logger
|
|
|
|
*/
|
|
|
|
protected $logger;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Inject log manager to get concrete logger from it.
|
|
|
|
*
|
|
|
|
* @param \TYPO3\CMS\Core\Log\LogManager $logManager
|
|
|
|
*/
|
|
|
|
public function injectLogger(\TYPO3\CMS\Core\Log\LogManager $logManager)
|
|
|
|
{
|
|
|
|
$this->logger = $logManager->getLogger(__CLASS__);
|
|
|
|
}
|
|
|
|
|
2017-07-13 12:51:36 +02:00
|
|
|
public function __construct(ConnectionInterface $connection, ConfigurationContainerInterface $configuration)
|
2017-07-04 12:12:36 +02:00
|
|
|
{
|
|
|
|
$this->connection = $connection;
|
2017-07-13 12:51:36 +02:00
|
|
|
$this->configuration = $configuration;
|
2017-07-04 12:12:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public function indexAllDocuments()
|
|
|
|
{
|
|
|
|
$this->logger->info('Start indexing');
|
|
|
|
foreach ($this->getRecordGenerator() as $records) {
|
|
|
|
if ($records === null) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-07-13 12:51:36 +02:00
|
|
|
foreach ($records as &$record) {
|
|
|
|
$this->prepareRecord($record);
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->logger->debug('Index records.', [$records]);
|
2017-07-04 12:12:36 +02:00
|
|
|
$this->connection->addDocuments($this->getDocumentName(), $records);
|
|
|
|
}
|
|
|
|
$this->logger->info('Finish indexing');
|
|
|
|
}
|
|
|
|
|
2018-03-06 17:40:49 +01:00
|
|
|
public function indexDocument(string $identifier)
|
2017-07-04 12:12:36 +02:00
|
|
|
{
|
|
|
|
$this->logger->info('Start indexing single record.', [$identifier]);
|
|
|
|
try {
|
2018-10-02 16:25:05 +02:00
|
|
|
$record = $this->getRecord((int)$identifier);
|
2017-07-13 12:51:36 +02:00
|
|
|
$this->prepareRecord($record);
|
|
|
|
|
|
|
|
$this->connection->addDocument($this->getDocumentName(), $record);
|
2017-07-04 12:12:36 +02:00
|
|
|
} catch (NoRecordFoundException $e) {
|
2017-11-10 13:47:26 +01:00
|
|
|
$this->logger->info('Could not index document. Try to delete it therefore.', [$e->getMessage()]);
|
2018-10-27 11:23:46 +02:00
|
|
|
$this->connection->deleteDocument($this->getDocumentName(), $this->getDocumentIdentifier($identifier));
|
2017-07-04 12:12:36 +02:00
|
|
|
}
|
|
|
|
$this->logger->info('Finish indexing');
|
|
|
|
}
|
|
|
|
|
2017-11-10 13:22:15 +01:00
|
|
|
public function delete()
|
|
|
|
{
|
|
|
|
$this->logger->info('Start deletion of index.');
|
2018-10-25 14:01:57 +02:00
|
|
|
$this->connection->deleteIndex();
|
|
|
|
$this->logger->info('Finish deletion.');
|
|
|
|
}
|
|
|
|
|
|
|
|
public function deleteDocuments()
|
|
|
|
{
|
|
|
|
$this->logger->info('Start deletion of indexed documents.');
|
2018-10-11 16:50:32 +02:00
|
|
|
$this->connection->deleteIndexByQuery(Query::create([
|
|
|
|
'query' => [
|
2018-10-24 14:08:59 +02:00
|
|
|
'term' => [
|
|
|
|
'search_document_type' => $this->getDocumentName()
|
2018-10-11 16:50:32 +02:00
|
|
|
]
|
|
|
|
]
|
|
|
|
]));
|
2017-11-10 13:22:15 +01:00
|
|
|
$this->logger->info('Finish deletion.');
|
|
|
|
}
|
|
|
|
|
2018-10-27 13:10:59 +02:00
|
|
|
protected function getRecordGenerator(): \Generator
|
2017-07-04 12:12:36 +02:00
|
|
|
{
|
|
|
|
$offset = 0;
|
2017-07-20 09:48:44 +02:00
|
|
|
$limit = $this->getLimit();
|
2017-07-04 12:12:36 +02:00
|
|
|
|
2018-10-27 13:56:29 +02:00
|
|
|
while (($records = $this->getRecords($offset, $limit)) !== []) {
|
|
|
|
yield $records;
|
2017-07-04 12:12:36 +02:00
|
|
|
$offset += $limit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-13 12:51:36 +02:00
|
|
|
protected function prepareRecord(array &$record)
|
2017-11-08 21:05:53 +01:00
|
|
|
{
|
|
|
|
try {
|
|
|
|
foreach ($this->configuration->get('indexing.' . $this->identifier . '.dataProcessing') as $configuration) {
|
2018-04-24 09:37:25 +02:00
|
|
|
$record = $this->dataProcessorService->executeDataProcessor($configuration, $record, $this->identifier);
|
2017-11-08 21:05:53 +01:00
|
|
|
}
|
|
|
|
} catch (InvalidArgumentException $e) {
|
|
|
|
// Nothing to do.
|
|
|
|
}
|
2018-10-12 18:43:28 +02:00
|
|
|
$this->generateSearchIdentifiers($record);
|
2017-11-08 21:05:53 +01:00
|
|
|
$this->handleAbstract($record);
|
|
|
|
}
|
|
|
|
|
2018-10-12 18:43:28 +02:00
|
|
|
protected function generateSearchIdentifiers(array &$record)
|
2018-10-11 16:50:32 +02:00
|
|
|
{
|
2018-10-12 18:43:28 +02:00
|
|
|
if (!isset($record['search_document'])) {
|
|
|
|
$record['search_document_type'] = $this->getDocumentName();
|
|
|
|
}
|
2018-10-11 16:50:32 +02:00
|
|
|
if (!isset($record['search_identifier']) && isset($record['uid'])) {
|
2018-10-27 11:23:46 +02:00
|
|
|
$record['search_identifier'] = $this->getDocumentIdentifier($record['uid']);
|
2018-10-11 16:50:32 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-08 21:05:53 +01:00
|
|
|
protected function handleAbstract(array &$record)
|
2017-07-13 12:51:36 +02:00
|
|
|
{
|
|
|
|
$record['search_abstract'] = '';
|
|
|
|
|
2017-07-27 13:00:51 +02:00
|
|
|
try {
|
|
|
|
$fieldsToUse = GeneralUtility::trimExplode(
|
|
|
|
',',
|
|
|
|
$this->configuration->get('indexing.' . $this->identifier . '.abstractFields')
|
|
|
|
);
|
2018-03-06 17:58:19 +01:00
|
|
|
if ($fieldsToUse === []) {
|
2018-10-02 16:25:05 +02:00
|
|
|
throw new InvalidArgumentException('No fields to use', 1538487209251);
|
2017-07-27 13:00:51 +02:00
|
|
|
}
|
2018-10-02 16:25:05 +02:00
|
|
|
|
2017-07-27 13:00:51 +02:00
|
|
|
foreach ($fieldsToUse as $fieldToUse) {
|
|
|
|
if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) {
|
|
|
|
$record['search_abstract'] = trim($record[$fieldToUse]);
|
|
|
|
break;
|
|
|
|
}
|
2017-07-13 12:51:36 +02:00
|
|
|
}
|
2017-07-27 13:00:51 +02:00
|
|
|
} catch (InvalidArgumentException $e) {
|
2018-10-02 16:25:05 +02:00
|
|
|
// Nothing to do.
|
2017-07-13 12:51:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-20 09:48:44 +02:00
|
|
|
/**
|
|
|
|
* Returns the limit to use to fetch records.
|
|
|
|
*/
|
2018-10-02 16:25:05 +02:00
|
|
|
protected function getLimit(): int
|
2017-07-20 09:48:44 +02:00
|
|
|
{
|
|
|
|
// TODO: Make configurable.
|
|
|
|
return 50;
|
|
|
|
}
|
|
|
|
|
2018-10-27 11:23:46 +02:00
|
|
|
public function setIdentifier(string $identifier)
|
|
|
|
{
|
|
|
|
$this->identifier = $identifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getIdentifier(): string
|
|
|
|
{
|
|
|
|
return $this->identifier;
|
|
|
|
}
|
|
|
|
|
2018-10-27 13:56:29 +02:00
|
|
|
abstract protected function getRecords(int $offset, int $limit): array;
|
2017-07-04 12:12:36 +02:00
|
|
|
|
|
|
|
/**
|
2018-10-27 13:56:29 +02:00
|
|
|
* @throws NoRecordFoundException If record could not be found.
|
2017-07-04 12:12:36 +02:00
|
|
|
*/
|
2018-10-02 16:25:05 +02:00
|
|
|
abstract protected function getRecord(int $identifier): array;
|
2017-07-04 12:12:36 +02:00
|
|
|
|
2018-10-02 16:25:05 +02:00
|
|
|
abstract protected function getDocumentName(): string;
|
2018-10-25 14:01:57 +02:00
|
|
|
|
2018-10-27 11:23:46 +02:00
|
|
|
abstract public function getDocumentIdentifier($identifier): string;
|
2017-07-04 12:12:36 +02:00
|
|
|
}
|