diff --git a/Classes/Domain/Index/AbstractIndexer.php b/Classes/Domain/Index/AbstractIndexer.php
index 475532b..143a219 100644
--- a/Classes/Domain/Index/AbstractIndexer.php
+++ b/Classes/Domain/Index/AbstractIndexer.php
@@ -20,7 +20,10 @@ namespace Codappix\SearchCore\Domain\Index;
* 02110-1301, USA.
*/
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
+use Codappix\SearchCore\Configuration\InvalidArgumentException;
use Codappix\SearchCore\Connection\ConnectionInterface;
+use \TYPO3\CMS\Core\Utility\GeneralUtility;
abstract class AbstractIndexer implements IndexerInterface
{
@@ -29,6 +32,16 @@ abstract class AbstractIndexer implements IndexerInterface
*/
protected $connection;
+ /**
+ * @var ConfigurationContainerInterface
+ */
+ protected $configuration;
+
+ /**
+ * @var string
+ */
+ protected $identifier = '';
+
/**
* @var \TYPO3\CMS\Core\Log\Logger
*/
@@ -44,23 +57,34 @@ abstract class AbstractIndexer implements IndexerInterface
$this->logger = $logManager->getLogger(__CLASS__);
}
+ public function setIdentifier($identifier)
+ {
+ $this->identifier = $identifier;
+ }
+
/**
* @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
*/
- public function __construct(ConnectionInterface $connection)
+ public function __construct(ConnectionInterface $connection, ConfigurationContainerInterface $configuration)
{
$this->connection = $connection;
+ $this->configuration = $configuration;
}
public function indexAllDocuments()
{
$this->logger->info('Start indexing');
foreach ($this->getRecordGenerator() as $records) {
- $this->logger->debug('Index records.', [$records]);
if ($records === null) {
break;
}
+ foreach ($records as &$record) {
+ $this->prepareRecord($record);
+ }
+
+ $this->logger->debug('Index records.', [$records]);
$this->connection->addDocuments($this->getDocumentName(), $records);
}
$this->logger->info('Finish indexing');
@@ -70,7 +94,10 @@ abstract class AbstractIndexer implements IndexerInterface
{
$this->logger->info('Start indexing single record.', [$identifier]);
try {
- $this->connection->addDocument($this->getDocumentName(), $this->getRecord($identifier));
+ $record = $this->getRecord($identifier);
+ $this->prepareRecord($record);
+
+ $this->connection->addDocument($this->getDocumentName(), $record);
} catch (NoRecordFoundException $e) {
$this->logger->info('Could not index document.', [$e->getMessage()]);
}
@@ -91,6 +118,32 @@ abstract class AbstractIndexer implements IndexerInterface
}
}
+ /**
+ * @param array &$record
+ */
+ protected function prepareRecord(array &$record)
+ {
+ $record['search_abstract'] = '';
+
+ try {
+ $fieldsToUse = GeneralUtility::trimExplode(
+ ',',
+ $this->configuration->get('indexing.' . $this->identifier . '.abstractFields')
+ );
+ if (!$fieldsToUse) {
+ return;
+ }
+ foreach ($fieldsToUse as $fieldToUse) {
+ if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) {
+ $record['search_abstract'] = trim($record[$fieldToUse]);
+ break;
+ }
+ }
+ } catch (InvalidArgumentException $e) {
+ return;
+ }
+ }
+
/**
* Returns the limit to use to fetch records.
*
diff --git a/Classes/Domain/Index/IndexerFactory.php b/Classes/Domain/Index/IndexerFactory.php
index 6618d01..dbae818 100644
--- a/Classes/Domain/Index/IndexerFactory.php
+++ b/Classes/Domain/Index/IndexerFactory.php
@@ -83,17 +83,30 @@ class IndexerFactory implements Singleton
*/
protected function buildIndexer($indexerClass, $identifier)
{
- if ($indexerClass === TcaIndexer::class) {
- return $this->objectManager->get(
- TcaIndexer::class,
+ $indexer = null;
+ if (is_subclass_of($indexerClass, TcaIndexer\PagesIndexer::class)
+ || $indexerClass === TcaIndexer\PagesIndexer::class
+ ) {
+ $indexer = $this->objectManager->get(
+ $indexerClass,
+ $this->objectManager->get(TcaTableService::class, $identifier),
+ $this->objectManager->get(TcaTableService::class, 'tt_content')
+ );
+ } elseif (is_subclass_of($indexerClass, TcaIndexer::class) || $indexerClass === TcaIndexer::class) {
+ $indexer = $this->objectManager->get(
+ $indexerClass,
$this->objectManager->get(TcaTableService::class, $identifier)
);
+ } elseif (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) {
+ $indexer = $this->objectManager->get($indexerClass);
}
- if (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) {
- return $this->objectManager->get($indexerClass);
+ if ($indexer === null) {
+ throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442);
}
- throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442);
+ $indexer->setIdentifier($identifier);
+
+ return $indexer;
}
}
diff --git a/Classes/Domain/Index/IndexerInterface.php b/Classes/Domain/Index/IndexerInterface.php
index 5fef64f..5a4ca6c 100644
--- a/Classes/Domain/Index/IndexerInterface.php
+++ b/Classes/Domain/Index/IndexerInterface.php
@@ -40,4 +40,13 @@ interface IndexerInterface
* @return void
*/
public function indexDocument($identifier);
+
+ /**
+ * Recieves the identifier of the indexer itself.
+ *
+ * @param string $identifier
+ *
+ * @return void
+ */
+ public function setIdentifier($identifier);
}
diff --git a/Classes/Domain/Index/TcaIndexer.php b/Classes/Domain/Index/TcaIndexer.php
index 7923565..25bef53 100644
--- a/Classes/Domain/Index/TcaIndexer.php
+++ b/Classes/Domain/Index/TcaIndexer.php
@@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Index;
* 02110-1301, USA.
*/
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\ConnectionInterface;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
@@ -38,13 +39,16 @@ class TcaIndexer extends AbstractIndexer
/**
* @param TcaIndexer\TcaTableService $tcaTableService
* @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
*/
public function __construct(
TcaIndexer\TcaTableService $tcaTableService,
- ConnectionInterface $connection
+ ConnectionInterface $connection,
+ ConfigurationContainerInterface $configuration
) {
$this->tcaTableService = $tcaTableService;
$this->connection = $connection;
+ $this->configuration = $configuration;
}
/**
@@ -100,16 +104,19 @@ class TcaIndexer extends AbstractIndexer
return $this->tcaTableService->getTableName();
}
- protected function getQuery() : QueryBuilder
+ protected function getQuery($tcaTableService = null) : QueryBuilder
{
- $queryBuilder = $this->getDatabaseConnection()->getQueryBuilderForTable($this->tcaTableService->getTableName());
- $where = $this->tcaTableService->getWhereClause();
- $query = $queryBuilder->select(... $this->tcaTableService->getFields())
- ->from($this->tcaTableService->getTableClause())
+ if ($tcaTableService === null) {
+ $tcaTableService = $this->tcaTableService;
+ }
+ $queryBuilder = $this->getDatabaseConnection()->getQueryBuilderForTable($tcaTableService->getTableName());
+ $where = $tcaTableService->getWhereClause();
+ $query = $queryBuilder->select(... $tcaTableService->getFields())
+ ->from($tcaTableService->getTableClause())
->where($where->getStatement())
->setParameters($where->getParameters());
- foreach ($this->tcaTableService->getJoins() as $join) {
+ foreach ($tcaTableService->getJoins() as $join) {
$query->from($join->getTable());
$query->andWhere($join->getCondition());
}
diff --git a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php
new file mode 100644
index 0000000..b396b73
--- /dev/null
+++ b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php
@@ -0,0 +1,96 @@
+
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
+use Codappix\SearchCore\Connection\ConnectionInterface;
+use Codappix\SearchCore\Domain\Index\TcaIndexer;
+
+/**
+ * Specific indexer for Pages, will basically add content of page.
+ */
+class PagesIndexer extends TcaIndexer
+{
+ /**
+ * @var TcaTableService
+ */
+ protected $contentTableService;
+
+ /**
+ * @param TcaTableService $tcaTableService
+ * @param TcaTableService $tcaTableService
+ * @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
+ */
+ public function __construct(
+ TcaTableService $tcaTableService,
+ TcaTableService $contentTableService,
+ ConnectionInterface $connection,
+ ConfigurationContainerInterface $configuration
+ ) {
+ $this->tcaTableService = $tcaTableService;
+ $this->contentTableService = $contentTableService;
+ $this->connection = $connection;
+ $this->configuration = $configuration;
+ }
+
+ /**
+ * @param array &$record
+ */
+ protected function prepareRecord(array &$record)
+ {
+ $possibleTitleFields = ['nav_title', 'tx_tqseo_pagetitle_rel', 'title'];
+ foreach ($possibleTitleFields as $searchTitleField) {
+ if (isset($record[$searchTitleField]) && trim($record[$searchTitleField])) {
+ $record['search_title'] = trim($record[$searchTitleField]);
+ break;
+ }
+ }
+
+ $record['content'] = $this->fetchContentForPage($record['uid']);
+ parent::prepareRecord($record);
+ }
+
+ /**
+ * @param int $uid
+ * @return string
+ */
+ protected function fetchContentForPage($uid)
+ {
+ $contentElements = $this->getQuery($this->contentTableService)->execute()->fetchAll();
+
+ if ($contentElements === null) {
+ $this->logger->debug('No content for page ' . $uid);
+ return '';
+ }
+
+ $this->logger->debug('Fetched content for page ' . $uid);
+ $content = [];
+ foreach ($contentElements as $contentElement) {
+ $content[] = $contentElement['bodytext'];
+ }
+
+ // Remove Tags.
+ // Interpret escaped new lines and special chars.
+ // Trim, e.g. trailing or leading new lines.
+ return trim(stripcslashes(strip_tags(implode(' ', $content))));
+ }
+}
diff --git a/Classes/Domain/Index/TcaIndexer/TcaTableService.php b/Classes/Domain/Index/TcaIndexer/TcaTableService.php
index 77b7578..2ea89d5 100644
--- a/Classes/Domain/Index/TcaIndexer/TcaTableService.php
+++ b/Classes/Domain/Index/TcaIndexer/TcaTableService.php
@@ -143,6 +143,7 @@ class TcaTableService
{
$parameters = [];
$whereClause = $this->getSystemWhereClause();
+
$userDefinedWhere = $this->configuration->getIfExists('indexing.' . $this->getTableName() . '.additionalWhereClause');
if (is_string($userDefinedWhere)) {
$whereClause .= ' AND ' . $userDefinedWhere;
@@ -176,11 +177,14 @@ class TcaTableService
$this->logger->debug('Generated fields.', [$this->tableName, $fields]);
return $fields;
- return implode(', ', $fields);
}
public function getJoins() : array
{
+ if ($this->tableName === 'pages') {
+ return [];
+ }
+
return [
new Join('pages', 'pages.uid = ' . $this->tableName . '.pid'),
];
@@ -192,14 +196,19 @@ class TcaTableService
*/
public function getSystemWhereClause() : string
{
- return '1=1'
+ $whereClause = '1=1'
. BackendUtility::BEenableFields($this->tableName)
. BackendUtility::deleteClause($this->tableName)
-
- . BackendUtility::BEenableFields('pages')
- . BackendUtility::deleteClause('pages')
. ' AND pages.no_search = 0'
;
+
+ if ($this->tableName !== 'pages') {
+ $whereClause .= BackendUtility::BEenableFields('pages')
+ . BackendUtility::deleteClause('pages')
+ ;
+ }
+
+ return $whereClause;
}
/**
diff --git a/Configuration/TypoScript/constants.txt b/Configuration/TypoScript/constants.txt
index 149d610..86f685e 100644
--- a/Configuration/TypoScript/constants.txt
+++ b/Configuration/TypoScript/constants.txt
@@ -9,12 +9,12 @@ plugin {
}
indexing {
- # Pages are not supported yet, see
- # https://github.com/DanielSiepmann/search_core/issues/24 but
- # should also be added, together with additionalWhereClause
- # based on doktypes
tt_content {
- additionalWhereClause = pages.doktype NOT IN (3, 199) AND tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login')
+ additionalWhereClause = tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login')
+
+ pages {
+ additionalWhereClause = pages.doktype NOT IN (3, 199, 6, 254, 255)
+ abstractFields = abstract, description, bodytext
}
}
}
diff --git a/Configuration/TypoScript/setup.txt b/Configuration/TypoScript/setup.txt
index 0efcf4d..d77c42e 100644
--- a/Configuration/TypoScript/setup.txt
+++ b/Configuration/TypoScript/setup.txt
@@ -9,9 +9,19 @@ plugin {
}
indexing {
+ # Not for direct indexing therefore no indexer.
+ # Used to configure tt_content fetching while indexing pages
tt_content {
- indexer = Codappix\SearchCore\Domain\Index\TcaIndexer
- additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.tt_content.additionalWhereClause}
+ additionalWhereClause (
+ tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login')
+ AND tt_content.bodytext != ''
+ )
+ }
+
+ pages {
+ indexer = Codappix\SearchCore\Domain\Index\TcaIndexer\PagesIndexer
+ additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.pages.additionalWhereClause}
+ abstractFields = {$plugin.tx_searchcore.settings.indexing.pages.abstractFields}
}
}
}
diff --git a/Documentation/source/configuration.rst b/Documentation/source/configuration.rst
index a923ff7..8ea8e29 100644
--- a/Documentation/source/configuration.rst
+++ b/Documentation/source/configuration.rst
@@ -125,7 +125,7 @@ The following settings are available. For each setting its documented which inde
``rootLineBlacklist``
"""""""""""""""""""""
- Used by: :ref:`TcaIndexer`.
+ Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
Defines a blacklist of page uids. Records below any of these pages, or subpages, are not
indexed. This allows you to define areas that should not be indexed.
@@ -147,7 +147,7 @@ options are available:
``additionalWhereClause``
"""""""""""""""""""""""""
- Used by: :ref:`TcaIndexer`.
+ Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
Add additional SQL to where clauses to determine indexable records from the table. This way you
can exclude specific records like ``tt_content`` records with specific ``CType`` values or
@@ -162,6 +162,26 @@ options are available:
Make sure to prefix all fields with the corresponding table name. The selection from
database will contain joins and can lead to SQL errors if a field exists in multiple tables.
+.. _abstractFields:
+
+``abstractFields``
+"""""""""""""""""""""""""
+
+ Used by: :ref:`PagesIndexer`.
+
+ Define which field should be used to provide the auto generated field "search_abstract".
+ The fields have to exist in the record to be indexed. Therefore fields like ``content`` are also
+ possible.
+
+ Example::
+
+ # As last fallback we use the content of the page
+ plugin.tx_searchcore.settings.indexing.