diff --git a/.travis.yml b/.travis.yml
index 2d1ba4b..b347b8e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,11 +6,11 @@ addons:
- oracle-java8-set-default
before_install:
- curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.2.0.deb && sudo dpkg -i --force-confnew elasticsearch-5.2.0.deb && sudo service elasticsearch start
+ - mysql -u root -e 'GRANT ALL ON `typo3_ci_ft%`.* TO travis@127.0.0.1;'
language: php
php:
- - 5.6
- 7.0
- 7.1
@@ -24,7 +24,6 @@ env:
- typo3DatabaseHost="127.0.0.1"
- typo3DatabaseUsername="travis"
- typo3DatabasePassword=""
- matrix:
- TYPO3_VERSION="~6.2"
matrix:
diff --git a/Classes/Domain/Index/AbstractIndexer.php b/Classes/Domain/Index/AbstractIndexer.php
index 475532b..143a219 100644
--- a/Classes/Domain/Index/AbstractIndexer.php
+++ b/Classes/Domain/Index/AbstractIndexer.php
@@ -20,7 +20,10 @@ namespace Codappix\SearchCore\Domain\Index;
* 02110-1301, USA.
*/
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
+use Codappix\SearchCore\Configuration\InvalidArgumentException;
use Codappix\SearchCore\Connection\ConnectionInterface;
+use \TYPO3\CMS\Core\Utility\GeneralUtility;
abstract class AbstractIndexer implements IndexerInterface
{
@@ -29,6 +32,16 @@ abstract class AbstractIndexer implements IndexerInterface
*/
protected $connection;
+ /**
+ * @var ConfigurationContainerInterface
+ */
+ protected $configuration;
+
+ /**
+ * @var string
+ */
+ protected $identifier = '';
+
/**
* @var \TYPO3\CMS\Core\Log\Logger
*/
@@ -44,23 +57,34 @@ abstract class AbstractIndexer implements IndexerInterface
$this->logger = $logManager->getLogger(__CLASS__);
}
+ public function setIdentifier($identifier)
+ {
+ $this->identifier = $identifier;
+ }
+
/**
* @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
*/
- public function __construct(ConnectionInterface $connection)
+ public function __construct(ConnectionInterface $connection, ConfigurationContainerInterface $configuration)
{
$this->connection = $connection;
+ $this->configuration = $configuration;
}
public function indexAllDocuments()
{
$this->logger->info('Start indexing');
foreach ($this->getRecordGenerator() as $records) {
- $this->logger->debug('Index records.', [$records]);
if ($records === null) {
break;
}
+ foreach ($records as &$record) {
+ $this->prepareRecord($record);
+ }
+
+ $this->logger->debug('Index records.', [$records]);
$this->connection->addDocuments($this->getDocumentName(), $records);
}
$this->logger->info('Finish indexing');
@@ -70,7 +94,10 @@ abstract class AbstractIndexer implements IndexerInterface
{
$this->logger->info('Start indexing single record.', [$identifier]);
try {
- $this->connection->addDocument($this->getDocumentName(), $this->getRecord($identifier));
+ $record = $this->getRecord($identifier);
+ $this->prepareRecord($record);
+
+ $this->connection->addDocument($this->getDocumentName(), $record);
} catch (NoRecordFoundException $e) {
$this->logger->info('Could not index document.', [$e->getMessage()]);
}
@@ -91,6 +118,32 @@ abstract class AbstractIndexer implements IndexerInterface
}
}
+ /**
+ * @param array &$record
+ */
+ protected function prepareRecord(array &$record)
+ {
+ $record['search_abstract'] = '';
+
+ try {
+ $fieldsToUse = GeneralUtility::trimExplode(
+ ',',
+ $this->configuration->get('indexing.' . $this->identifier . '.abstractFields')
+ );
+ if (!$fieldsToUse) {
+ return;
+ }
+ foreach ($fieldsToUse as $fieldToUse) {
+ if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) {
+ $record['search_abstract'] = trim($record[$fieldToUse]);
+ break;
+ }
+ }
+ } catch (InvalidArgumentException $e) {
+ return;
+ }
+ }
+
/**
* Returns the limit to use to fetch records.
*
diff --git a/Classes/Domain/Index/IndexerFactory.php b/Classes/Domain/Index/IndexerFactory.php
index 6618d01..dbae818 100644
--- a/Classes/Domain/Index/IndexerFactory.php
+++ b/Classes/Domain/Index/IndexerFactory.php
@@ -83,17 +83,30 @@ class IndexerFactory implements Singleton
*/
protected function buildIndexer($indexerClass, $identifier)
{
- if ($indexerClass === TcaIndexer::class) {
- return $this->objectManager->get(
- TcaIndexer::class,
+ $indexer = null;
+ if (is_subclass_of($indexerClass, TcaIndexer\PagesIndexer::class)
+ || $indexerClass === TcaIndexer\PagesIndexer::class
+ ) {
+ $indexer = $this->objectManager->get(
+ $indexerClass,
+ $this->objectManager->get(TcaTableService::class, $identifier),
+ $this->objectManager->get(TcaTableService::class, 'tt_content')
+ );
+ } elseif (is_subclass_of($indexerClass, TcaIndexer::class) || $indexerClass === TcaIndexer::class) {
+ $indexer = $this->objectManager->get(
+ $indexerClass,
$this->objectManager->get(TcaTableService::class, $identifier)
);
+ } elseif (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) {
+ $indexer = $this->objectManager->get($indexerClass);
}
- if (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) {
- return $this->objectManager->get($indexerClass);
+ if ($indexer === null) {
+ throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442);
}
- throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442);
+ $indexer->setIdentifier($identifier);
+
+ return $indexer;
}
}
diff --git a/Classes/Domain/Index/IndexerInterface.php b/Classes/Domain/Index/IndexerInterface.php
index 5fef64f..5a4ca6c 100644
--- a/Classes/Domain/Index/IndexerInterface.php
+++ b/Classes/Domain/Index/IndexerInterface.php
@@ -40,4 +40,13 @@ interface IndexerInterface
* @return void
*/
public function indexDocument($identifier);
+
+ /**
+ * Recieves the identifier of the indexer itself.
+ *
+ * @param string $identifier
+ *
+ * @return void
+ */
+ public function setIdentifier($identifier);
}
diff --git a/Classes/Domain/Index/TcaIndexer.php b/Classes/Domain/Index/TcaIndexer.php
index c29cf60..44d7c46 100644
--- a/Classes/Domain/Index/TcaIndexer.php
+++ b/Classes/Domain/Index/TcaIndexer.php
@@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Index;
* 02110-1301, USA.
*/
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\ConnectionInterface;
/**
@@ -35,13 +36,16 @@ class TcaIndexer extends AbstractIndexer
/**
* @param TcaIndexer\TcaTableService $tcaTableService
* @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
*/
public function __construct(
TcaIndexer\TcaTableService $tcaTableService,
- ConnectionInterface $connection
+ ConnectionInterface $connection,
+ ConfigurationContainerInterface $configuration
) {
$this->tcaTableService = $tcaTableService;
$this->connection = $connection;
+ $this->configuration = $configuration;
}
/**
diff --git a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php
new file mode 100644
index 0000000..b6b71be
--- /dev/null
+++ b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php
@@ -0,0 +1,101 @@
+
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
+use Codappix\SearchCore\Connection\ConnectionInterface;
+use Codappix\SearchCore\Domain\Index\TcaIndexer;
+
+/**
+ * Specific indexer for Pages, will basically add content of page.
+ */
+class PagesIndexer extends TcaIndexer
+{
+ /**
+ * @var TcaTableService
+ */
+ protected $contentTableService;
+
+ /**
+ * @param TcaTableService $tcaTableService
+ * @param TcaTableService $tcaTableService
+ * @param ConnectionInterface $connection
+ * @param ConfigurationContainerInterface $configuration
+ */
+ public function __construct(
+ TcaTableService $tcaTableService,
+ TcaTableService $contentTableService,
+ ConnectionInterface $connection,
+ ConfigurationContainerInterface $configuration
+ ) {
+ $this->tcaTableService = $tcaTableService;
+ $this->contentTableService = $contentTableService;
+ $this->connection = $connection;
+ $this->configuration = $configuration;
+ }
+
+ /**
+ * @param array &$record
+ */
+ protected function prepareRecord(array &$record)
+ {
+ $possibleTitleFields = ['nav_title', 'tx_tqseo_pagetitle_rel', 'title'];
+ foreach ($possibleTitleFields as $searchTitleField) {
+ if (isset($record[$searchTitleField]) && trim($record[$searchTitleField])) {
+ $record['search_title'] = trim($record[$searchTitleField]);
+ break;
+ }
+ }
+
+ $record['content'] = $this->fetchContentForPage($record['uid']);
+ parent::prepareRecord($record);
+ }
+
+ /**
+ * @param int $uid
+ * @return string
+ */
+ protected function fetchContentForPage($uid)
+ {
+ $contentElements = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
+ $this->contentTableService->getFields(),
+ $this->contentTableService->getTableClause(),
+ $this->contentTableService->getWhereClause() .
+ sprintf(' AND %s.pid = %u', $this->contentTableService->getTableName(), $uid)
+ );
+
+ if ($contentElements === null) {
+ $this->logger->debug('No content for page ' . $uid);
+ return '';
+ }
+
+ $this->logger->debug('Fetched content for page ' . $uid);
+ $content = [];
+ foreach ($contentElements as $contentElement) {
+ $content[] = $contentElement['bodytext'];
+ }
+
+ // Remove Tags.
+ // Interpret escaped new lines and special chars.
+ // Trim, e.g. trailing or leading new lines.
+ return trim(stripcslashes(strip_tags(implode(' ', $content))));
+ }
+}
diff --git a/Classes/Domain/Index/TcaIndexer/TcaTableService.php b/Classes/Domain/Index/TcaIndexer/TcaTableService.php
index 21e6374..b5f48ab 100644
--- a/Classes/Domain/Index/TcaIndexer/TcaTableService.php
+++ b/Classes/Domain/Index/TcaIndexer/TcaTableService.php
@@ -102,6 +102,10 @@ class TcaTableService
*/
public function getTableClause()
{
+ if ($this->tableName === 'pages') {
+ return $this->tableName;
+ }
+
return $this->tableName . ' LEFT JOIN pages on ' . $this->tableName . '.pid = pages.uid';
}
@@ -145,12 +149,15 @@ class TcaTableService
$whereClause = '1=1'
. BackendUtility::BEenableFields($this->tableName)
. BackendUtility::deleteClause($this->tableName)
-
- . BackendUtility::BEenableFields('pages')
- . BackendUtility::deleteClause('pages')
. ' AND pages.no_search = 0'
;
+ if ($this->tableName !== 'pages') {
+ $whereClause .= BackendUtility::BEenableFields('pages')
+ . BackendUtility::deleteClause('pages')
+ ;
+ }
+
$userDefinedWhere = $this->configuration->getIfExists('indexing.' . $this->getTableName() . '.additionalWhereClause');
if (is_string($userDefinedWhere)) {
$whereClause .= ' AND ' . $userDefinedWhere;
diff --git a/Configuration/TypoScript/constants.txt b/Configuration/TypoScript/constants.txt
index f97c039..bcf191e 100644
--- a/Configuration/TypoScript/constants.txt
+++ b/Configuration/TypoScript/constants.txt
@@ -9,15 +9,9 @@ plugin {
}
indexing {
- # Pages are not supported yet, see
- # https://github.com/DanielSiepmann/search_core/issues/24 but
- # should also be added, together with additionalWhereClause
- # based on doktypes
- tt_content {
- additionalWhereClause (
- pages.doktype NOT IN (3, 199)
- AND tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login')
- )
+ pages {
+ additionalWhereClause = pages.doktype NOT IN (3, 199, 6, 254, 255)
+ abstractFields = abstract, description, bodytext
}
}
}
diff --git a/Configuration/TypoScript/setup.txt b/Configuration/TypoScript/setup.txt
index 0efcf4d..d77c42e 100644
--- a/Configuration/TypoScript/setup.txt
+++ b/Configuration/TypoScript/setup.txt
@@ -9,9 +9,19 @@ plugin {
}
indexing {
+ # Not for direct indexing therefore no indexer.
+ # Used to configure tt_content fetching while indexing pages
tt_content {
- indexer = Codappix\SearchCore\Domain\Index\TcaIndexer
- additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.tt_content.additionalWhereClause}
+ additionalWhereClause (
+ tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login')
+ AND tt_content.bodytext != ''
+ )
+ }
+
+ pages {
+ indexer = Codappix\SearchCore\Domain\Index\TcaIndexer\PagesIndexer
+ additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.pages.additionalWhereClause}
+ abstractFields = {$plugin.tx_searchcore.settings.indexing.pages.abstractFields}
}
}
}
diff --git a/Documentation/source/configuration.rst b/Documentation/source/configuration.rst
index a923ff7..8ea8e29 100644
--- a/Documentation/source/configuration.rst
+++ b/Documentation/source/configuration.rst
@@ -125,7 +125,7 @@ The following settings are available. For each setting its documented which inde
``rootLineBlacklist``
"""""""""""""""""""""
- Used by: :ref:`TcaIndexer`.
+ Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
Defines a blacklist of page uids. Records below any of these pages, or subpages, are not
indexed. This allows you to define areas that should not be indexed.
@@ -147,7 +147,7 @@ options are available:
``additionalWhereClause``
"""""""""""""""""""""""""
- Used by: :ref:`TcaIndexer`.
+ Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
Add additional SQL to where clauses to determine indexable records from the table. This way you
can exclude specific records like ``tt_content`` records with specific ``CType`` values or
@@ -162,6 +162,26 @@ options are available:
Make sure to prefix all fields with the corresponding table name. The selection from
database will contain joins and can lead to SQL errors if a field exists in multiple tables.
+.. _abstractFields:
+
+``abstractFields``
+"""""""""""""""""""""""""
+
+ Used by: :ref:`PagesIndexer`.
+
+ Define which field should be used to provide the auto generated field "search_abstract".
+ The fields have to exist in the record to be indexed. Therefore fields like ``content`` are also
+ possible.
+
+ Example::
+
+ # As last fallback we use the content of the page
+ plugin.tx_searchcore.settings.indexing.