From b6ab05bac7053d4127c70abfccc4e5781b6b104b Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 13 Jul 2017 12:51:36 +0200 Subject: [PATCH 1/8] FEATURE: Switch from ttcontent to pages Also provide search_abstract as new auto added field which is configurable. --- Classes/Domain/Index/AbstractIndexer.php | 54 ++++++++++- Classes/Domain/Index/IndexerFactory.php | 25 +++-- Classes/Domain/Index/IndexerInterface.php | 9 ++ Classes/Domain/Index/TcaIndexer.php | 6 +- .../Domain/Index/TcaIndexer/PagesIndexer.php | 93 +++++++++++++++++++ .../Index/TcaIndexer/TcaTableService.php | 13 ++- 6 files changed, 187 insertions(+), 13 deletions(-) create mode 100644 Classes/Domain/Index/TcaIndexer/PagesIndexer.php diff --git a/Classes/Domain/Index/AbstractIndexer.php b/Classes/Domain/Index/AbstractIndexer.php index b780dc5..18e1702 100644 --- a/Classes/Domain/Index/AbstractIndexer.php +++ b/Classes/Domain/Index/AbstractIndexer.php @@ -20,7 +20,9 @@ namespace Codappix\SearchCore\Domain\Index; * 02110-1301, USA. */ +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; use Codappix\SearchCore\Connection\ConnectionInterface; +use \TYPO3\CMS\Core\Utility\GeneralUtility; abstract class AbstractIndexer implements IndexerInterface { @@ -29,6 +31,16 @@ abstract class AbstractIndexer implements IndexerInterface */ protected $connection; + /** + * @var ConfigurationContainerInterface + */ + protected $configuration; + + /** + * @var string + */ + protected $identifier; + /** * @var \TYPO3\CMS\Core\Log\Logger */ @@ -44,23 +56,34 @@ abstract class AbstractIndexer implements IndexerInterface $this->logger = $logManager->getLogger(__CLASS__); } + public function setIdentifier($identifier) + { + $this->identifier = $identifier; + } + /** * @param ConnectionInterface $connection + * @param ConfigurationContainerInterface $configuration */ - public function __construct(ConnectionInterface $connection) + public function __construct(ConnectionInterface $connection, ConfigurationContainerInterface $configuration) { $this->connection = $connection; + $this->configuration = $configuration; } public function indexAllDocuments() { $this->logger->info('Start indexing'); foreach ($this->getRecordGenerator() as $records) { - $this->logger->debug('Index records.', [$records]); if ($records === null) { break; } + foreach ($records as &$record) { + $this->prepareRecord($record); + } + + $this->logger->debug('Index records.', [$records]); $this->connection->addDocuments($this->getDocumentName(), $records); } $this->logger->info('Finish indexing'); @@ -70,7 +93,10 @@ abstract class AbstractIndexer implements IndexerInterface { $this->logger->info('Start indexing single record.', [$identifier]); try { - $this->connection->addDocument($this->getDocumentName(), $this->getRecord($identifier)); + $record = $this->getRecord($identifier); + $this->prepareRecord($record); + + $this->connection->addDocument($this->getDocumentName(), $record); } catch (NoRecordFoundException $e) { $this->logger->info('Could not index document.', [$e->getMessage()]); } @@ -92,6 +118,28 @@ abstract class AbstractIndexer implements IndexerInterface } } + /** + * @param array &$record + */ + protected function prepareRecord(array &$record) + { + $record['search_abstract'] = ''; + + $fieldsToUse = GeneralUtility::trimExplode( + ',', + $this->configuration->getIfExists('indexing.' . $this->identifier . '.abstractFields') + ); + if (!$fieldsToUse) { + return; + } + foreach ($fieldsToUse as $fieldToUse) { + if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) { + $record['search_abstract'] = trim($record[$fieldToUse]); + break; + } + } + } + /** * @param int $offset * @param int $limit diff --git a/Classes/Domain/Index/IndexerFactory.php b/Classes/Domain/Index/IndexerFactory.php index 6618d01..dbae818 100644 --- a/Classes/Domain/Index/IndexerFactory.php +++ b/Classes/Domain/Index/IndexerFactory.php @@ -83,17 +83,30 @@ class IndexerFactory implements Singleton */ protected function buildIndexer($indexerClass, $identifier) { - if ($indexerClass === TcaIndexer::class) { - return $this->objectManager->get( - TcaIndexer::class, + $indexer = null; + if (is_subclass_of($indexerClass, TcaIndexer\PagesIndexer::class) + || $indexerClass === TcaIndexer\PagesIndexer::class + ) { + $indexer = $this->objectManager->get( + $indexerClass, + $this->objectManager->get(TcaTableService::class, $identifier), + $this->objectManager->get(TcaTableService::class, 'tt_content') + ); + } elseif (is_subclass_of($indexerClass, TcaIndexer::class) || $indexerClass === TcaIndexer::class) { + $indexer = $this->objectManager->get( + $indexerClass, $this->objectManager->get(TcaTableService::class, $identifier) ); + } elseif (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) { + $indexer = $this->objectManager->get($indexerClass); } - if (class_exists($indexerClass) && in_array(IndexerInterface::class, class_implements($indexerClass))) { - return $this->objectManager->get($indexerClass); + if ($indexer === null) { + throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442); } - throw new NoMatchingIndexerException('Could not find indexer: ' . $indexerClass, 1497341442); + $indexer->setIdentifier($identifier); + + return $indexer; } } diff --git a/Classes/Domain/Index/IndexerInterface.php b/Classes/Domain/Index/IndexerInterface.php index 5fef64f..5a4ca6c 100644 --- a/Classes/Domain/Index/IndexerInterface.php +++ b/Classes/Domain/Index/IndexerInterface.php @@ -40,4 +40,13 @@ interface IndexerInterface * @return void */ public function indexDocument($identifier); + + /** + * Recieves the identifier of the indexer itself. + * + * @param string $identifier + * + * @return void + */ + public function setIdentifier($identifier); } diff --git a/Classes/Domain/Index/TcaIndexer.php b/Classes/Domain/Index/TcaIndexer.php index c29cf60..44d7c46 100644 --- a/Classes/Domain/Index/TcaIndexer.php +++ b/Classes/Domain/Index/TcaIndexer.php @@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Index; * 02110-1301, USA. */ +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; use Codappix\SearchCore\Connection\ConnectionInterface; /** @@ -35,13 +36,16 @@ class TcaIndexer extends AbstractIndexer /** * @param TcaIndexer\TcaTableService $tcaTableService * @param ConnectionInterface $connection + * @param ConfigurationContainerInterface $configuration */ public function __construct( TcaIndexer\TcaTableService $tcaTableService, - ConnectionInterface $connection + ConnectionInterface $connection, + ConfigurationContainerInterface $configuration ) { $this->tcaTableService = $tcaTableService; $this->connection = $connection; + $this->configuration = $configuration; } /** diff --git a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php new file mode 100644 index 0000000..d4ce0a6 --- /dev/null +++ b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php @@ -0,0 +1,93 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Connection\ConnectionInterface; +use Codappix\SearchCore\Domain\Index\TcaIndexer; + +/** + * Specific indexer for Pages, will basically add content of page. + */ +class PagesIndexer extends TcaIndexer +{ + /** + * @var TcaTableService + */ + protected $contentTableService; + + /** + * @param TcaTableService $tcaTableService + * @param TcaTableService $tcaTableService + * @param ConnectionInterface $connection + * @param ConfigurationContainerInterface $configuration + */ + public function __construct( + TcaTableService $tcaTableService, + TcaTableService $contentTableService, + ConnectionInterface $connection, + ConfigurationContainerInterface $configuration + ) { + $this->tcaTableService = $tcaTableService; + $this->contentTableService = $contentTableService; + $this->connection = $connection; + $this->configuration = $configuration; + } + + /** + * @param array &$record + */ + protected function prepareRecord(array &$record) + { + parent::prepareRecord($record); + $record['content'] = $this->fetchContentForPage($record['uid']); + } + + /** + * @param int $uid + * @return string + */ + protected function fetchContentForPage($uid) + { + $contentElements = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( + $this->contentTableService->getFields(), + $this->contentTableService->getTableClause(), + $this->contentTableService->getWhereClause() . + sprintf(' AND %s.pid = %u', $this->contentTableService->getTableName(), $uid) + ); + + if ($contentElements === null) { + $this->logger->debug('No content for page ' . $uid); + return ''; + } + + $this->logger->debug('Fetched content for page ' . $uid); + $content = []; + foreach ($contentElements as $contentElement) { + $content[] = $contentElement['bodytext']; + } + + // Remove Tags. + // Interpret escaped new lines and special chars. + // Trim, e.g. trailing or leading new lines. + return trim(stripcslashes(strip_tags(implode(' ', $content)))); + } +} diff --git a/Classes/Domain/Index/TcaIndexer/TcaTableService.php b/Classes/Domain/Index/TcaIndexer/TcaTableService.php index 21e6374..b5f48ab 100644 --- a/Classes/Domain/Index/TcaIndexer/TcaTableService.php +++ b/Classes/Domain/Index/TcaIndexer/TcaTableService.php @@ -102,6 +102,10 @@ class TcaTableService */ public function getTableClause() { + if ($this->tableName === 'pages') { + return $this->tableName; + } + return $this->tableName . ' LEFT JOIN pages on ' . $this->tableName . '.pid = pages.uid'; } @@ -145,12 +149,15 @@ class TcaTableService $whereClause = '1=1' . BackendUtility::BEenableFields($this->tableName) . BackendUtility::deleteClause($this->tableName) - - . BackendUtility::BEenableFields('pages') - . BackendUtility::deleteClause('pages') . ' AND pages.no_search = 0' ; + if ($this->tableName !== 'pages') { + $whereClause .= BackendUtility::BEenableFields('pages') + . BackendUtility::deleteClause('pages') + ; + } + $userDefinedWhere = $this->configuration->getIfExists('indexing.' . $this->getTableName() . '.additionalWhereClause'); if (is_string($userDefinedWhere)) { $whereClause .= ' AND ' . $userDefinedWhere; From d36d8e859403bd88ec227544adb807726db5f5ca Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 13 Jul 2017 13:58:16 +0200 Subject: [PATCH 2/8] TASK: Add documentation and config for new feature --- Configuration/TypoScript/constants.txt | 12 +++--------- Configuration/TypoScript/setup.txt | 7 ++++--- Documentation/source/configuration.rst | 24 ++++++++++++++++++++++-- Documentation/source/indexer.rst | 24 +++++++++++++++++++++++- 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/Configuration/TypoScript/constants.txt b/Configuration/TypoScript/constants.txt index f97c039..dff37e3 100644 --- a/Configuration/TypoScript/constants.txt +++ b/Configuration/TypoScript/constants.txt @@ -9,15 +9,9 @@ plugin { } indexing { - # Pages are not supported yet, see - # https://github.com/DanielSiepmann/search_core/issues/24 but - # should also be added, together with additionalWhereClause - # based on doktypes - tt_content { - additionalWhereClause ( - pages.doktype NOT IN (3, 199) - AND tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login') - ) + pages { + additionalWhereClause = pages.doktype NOT IN (3, 199, 6, 254, 255, 199) + abstractFields = abstract, description, bodytext } } } diff --git a/Configuration/TypoScript/setup.txt b/Configuration/TypoScript/setup.txt index 0efcf4d..e81368e 100644 --- a/Configuration/TypoScript/setup.txt +++ b/Configuration/TypoScript/setup.txt @@ -9,9 +9,10 @@ plugin { } indexing { - tt_content { - indexer = Codappix\SearchCore\Domain\Index\TcaIndexer - additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.tt_content.additionalWhereClause} + pages { + indexer = Codappix\SearchCore\Domain\Index\TcaIndexer\PagesIndexer + additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.pages.additionalWhereClause} + abstractFields = {$plugin.tx_searchcore.settings.indexing.pages.abstractFields} } } } diff --git a/Documentation/source/configuration.rst b/Documentation/source/configuration.rst index 3d8db75..9db3c6e 100644 --- a/Documentation/source/configuration.rst +++ b/Documentation/source/configuration.rst @@ -125,7 +125,7 @@ The following settings are available. For each setting its documented which inde ``rootLineBlacklist`` """"""""""""""""""""" - Used by: :ref:`TcaIndexer`. + Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`. Defines a blacklist of page uids. Records below any of these pages, or subpages, are not indexed. This allows you to define areas that should not be indexed. @@ -147,7 +147,7 @@ options are available: ``additionalWhereClause`` """"""""""""""""""""""""" - Used by: :ref:`TcaIndexer`. + Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`. Add additional SQL to where clauses to determine indexable records from the table. This way you can exclude specific records like ``tt_content`` records with specific ``CType`` values or @@ -162,6 +162,26 @@ options are available: Make sure to prefix all fields with the corresponding table name. The selection from database will contain joins and can lead to SQL errors if a field exists in multiple tables. +.. _abstractFields: + +``abstractFields`` +""""""""""""""""""""""""" + + Used by: :ref:`PagesIndexer`. + + Define which field should be used to provide the auto generated field "search_abstract". + The fields have to exist in the record to be indexed. Therefore fields like ``content`` are also + possible. + + Example:: + + # As last fallback we use the content of the page + plugin.tx_searchcore.settings.indexing..abstractFields := addToList(content) + + Default:: + + abstract, description, bodytext + .. _mapping: ``mapping`` diff --git a/Documentation/source/indexer.rst b/Documentation/source/indexer.rst index 01d7b18..ddc6772 100644 --- a/Documentation/source/indexer.rst +++ b/Documentation/source/indexer.rst @@ -27,7 +27,29 @@ The indexer is configurable through the following options: * :ref:`additionalWhereClause` +.. _PagesIndexer: + +PagesIndexer +------------ + +Provides zero configuration TYPO3 integration by using the :ref:`t3tcaref:start`. You just can +start indexing TYPO3. + +The indexer will use the TCA to fetch all necessary information like relations. Currently the +implementation is very basic. In future it will also provide mapping for :ref:`Elasticsearch` and +further stuff. Also all static content from each page will be concatenated into a single field to +improve search. + +The indexer is configurable through the following options: + +* :ref:`allowedTables` + +* :ref:`rootLineBlacklist` + +* :ref:`additionalWhereClause` + +* :ref:`abstractFields` + .. note:: Not all relations are resolved yet, see :issue:`17` and :pr:`20`. - Also the `pages`-Table is not available yet, see :issue:`24`. From 299ec3af5eedcb92ddb46528017256cd58e47ed6 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 13 Jul 2017 14:54:37 +0200 Subject: [PATCH 3/8] TASK: Add tests covering new pages indexing Including content and search abstract. --- Configuration/TypoScript/setup.txt | 9 +++ Tests/Functional/Fixtures/BasicSetup.ts | 16 +++++ Tests/Functional/Fixtures/BasicSetup.xml | 1 + .../Fixtures/Indexing/IndexTcaTable.xml | 39 +++++++++++ .../Functional/Indexing/PagesIndexerTest.php | 64 +++++++++++++++++++ Tests/Functional/Indexing/TcaIndexerTest.php | 16 ++--- 6 files changed, 137 insertions(+), 8 deletions(-) create mode 100644 Tests/Functional/Indexing/PagesIndexerTest.php diff --git a/Configuration/TypoScript/setup.txt b/Configuration/TypoScript/setup.txt index e81368e..d77c42e 100644 --- a/Configuration/TypoScript/setup.txt +++ b/Configuration/TypoScript/setup.txt @@ -9,6 +9,15 @@ plugin { } indexing { + # Not for direct indexing therefore no indexer. + # Used to configure tt_content fetching while indexing pages + tt_content { + additionalWhereClause ( + tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login') + AND tt_content.bodytext != '' + ) + } + pages { indexer = Codappix\SearchCore\Domain\Index\TcaIndexer\PagesIndexer additionalWhereClause = {$plugin.tx_searchcore.settings.indexing.pages.additionalWhereClause} diff --git a/Tests/Functional/Fixtures/BasicSetup.ts b/Tests/Functional/Fixtures/BasicSetup.ts index e24aee8..1e2b3a9 100644 --- a/Tests/Functional/Fixtures/BasicSetup.ts +++ b/Tests/Functional/Fixtures/BasicSetup.ts @@ -12,6 +12,22 @@ plugin { tt_content { indexer = Codappix\SearchCore\Domain\Index\TcaIndexer + additionalWhereClause ( + tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu', 'shortcut', 'search', 'login') + AND tt_content.bodytext != '' + ) + + mapping { + CType { + type = keyword + } + } + } + + pages { + indexer = Codappix\SearchCore\Domain\Index\TcaIndexer\PagesIndexer + abstractFields = abstract, description, bodytext + mapping { CType { type = keyword diff --git a/Tests/Functional/Fixtures/BasicSetup.xml b/Tests/Functional/Fixtures/BasicSetup.xml index a85b72a..1a46f3b 100644 --- a/Tests/Functional/Fixtures/BasicSetup.xml +++ b/Tests/Functional/Fixtures/BasicSetup.xml @@ -4,5 +4,6 @@ 1 0 Root page containing necessary TypoScript + Used as abstract as no abstract is defined. diff --git a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml index 75a1f35..ec251ce 100644 --- a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml +++ b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml @@ -60,4 +60,43 @@ 0 + + 9 + 1 + 1480686370 + 1480686370 + 0 + 72 + list +
not indexed due to ctype
+ this is the content of header content element that should not get indexed + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +
+ + + 10 + 1 + 1480686370 + 1480686370 + 0 + 72 + html +
Indexed without html tags
+ Some text in paragraph

]]>
+ 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +
diff --git a/Tests/Functional/Indexing/PagesIndexerTest.php b/Tests/Functional/Indexing/PagesIndexerTest.php new file mode 100644 index 0000000..d0440ba --- /dev/null +++ b/Tests/Functional/Indexing/PagesIndexerTest.php @@ -0,0 +1,64 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Connection\Elasticsearch; +use Codappix\SearchCore\Domain\Index\IndexerFactory; +use Codappix\SearchCore\Tests\Functional\AbstractFunctionalTestCase; +use TYPO3\CMS\Extbase\Object\ObjectManager; + +class PagesIndexerTest extends AbstractFunctionalTestCase +{ + /** + * @test + */ + public function pagesContainAllAdditionalInformation() + { + $this->importDataSet('Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml'); + + $objectManager = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(ObjectManager::class); + $tableName = 'pages'; + + $connection = $this->getMockBuilder(Elasticsearch::class) + ->setMethods(['addDocuments']) + ->disableOriginalConstructor() + ->getMock(); + + $connection->expects($this->once()) + ->method('addDocuments') + ->with( + $this->stringContains($tableName), + $this->callback(function ($documents) { + return count($documents) === 1 + && isset($documents[0]['content']) && $documents[0]['content'] === + 'this is the content of header content element that should get indexed Some text in paragraph' + && isset($documents[0]['search_abstract']) && $documents[0]['search_abstract'] === + 'Used as abstract as no abstract is defined.' + ; + }) + ); + + $indexer = $objectManager->get(IndexerFactory::class)->getIndexer($tableName); + $this->inject($indexer, 'connection', $connection); + $indexer->indexAllDocuments(); + } +} diff --git a/Tests/Functional/Indexing/TcaIndexerTest.php b/Tests/Functional/Indexing/TcaIndexerTest.php index 2b3f817..edf1a74 100644 --- a/Tests/Functional/Indexing/TcaIndexerTest.php +++ b/Tests/Functional/Indexing/TcaIndexerTest.php @@ -30,6 +30,14 @@ use TYPO3\CMS\Extbase\Object\ObjectManager; class TcaIndexerTest extends AbstractFunctionalTestCase { + protected function getTypoScriptFilesForFrontendRootPage() + { + return array_merge( + parent::getTypoScriptFilesForFrontendRootPage(), + ['EXT:search_core/Tests/Functional/Fixtures/Indexing/TcaIndexer/RespectRootLineBlacklist.ts'] + ); + } + /** * @test */ @@ -69,12 +77,4 @@ class TcaIndexerTest extends AbstractFunctionalTestCase $objectManager->get(TcaIndexer::class, $tableService, $connection)->indexAllDocuments(); } - - protected function getTypoScriptFilesForFrontendRootPage() - { - return array_merge( - parent::getTypoScriptFilesForFrontendRootPage(), - ['EXT:search_core/Tests/Functional/Fixtures/Indexing/TcaIndexer/RespectRootLineBlacklist.ts'] - ); - } } From e2c1846d6be1fd57da7db3879b36f83fd2dace81 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 18 Jul 2017 14:19:32 +0200 Subject: [PATCH 4/8] BUGFIX: Allow parent implementation to access content entry E.g. it's possible to configure search_abstract to contain the value of any record entry. Still content was not possible as it was set afterwards. --- Classes/Domain/Index/TcaIndexer/PagesIndexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php index d4ce0a6..94a898b 100644 --- a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php +++ b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php @@ -57,8 +57,8 @@ class PagesIndexer extends TcaIndexer */ protected function prepareRecord(array &$record) { - parent::prepareRecord($record); $record['content'] = $this->fetchContentForPage($record['uid']); + parent::prepareRecord($record); } /** From a737501dac636eaf390b103e1ed2c7728004d685 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 18 Jul 2017 15:12:24 +0200 Subject: [PATCH 5/8] TASK: Update existing tests to work with new pages setup --- .../Elasticsearch/IndexTcaTableTest.php | 22 ++++++++++--------- .../Fixtures/Indexing/IndexTcaTable.xml | 2 +- .../Fixtures/Indexing/ResolveRelations.xml | 12 +++++----- .../Fixtures/Indexing/UserWhereClause.xml | 4 ++-- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/Tests/Functional/Connection/Elasticsearch/IndexTcaTableTest.php b/Tests/Functional/Connection/Elasticsearch/IndexTcaTableTest.php index 9678d83..21f084a 100644 --- a/Tests/Functional/Connection/Elasticsearch/IndexTcaTableTest.php +++ b/Tests/Functional/Connection/Elasticsearch/IndexTcaTableTest.php @@ -50,10 +50,10 @@ class IndexTcaTableTest extends AbstractFunctionalTestCase $response = $this->client->request('typo3content/_search?q=*:*'); $this->assertTrue($response->isOK(), 'Elastica did not answer with ok code.'); - $this->assertSame($response->getData()['hits']['total'], 1, 'Not exactly 1 document was indexed.'); + $this->assertSame($response->getData()['hits']['total'], 2, 'Not exactly 2 documents were indexed.'); $this->assertArraySubset( ['_source' => ['header' => 'indexed content element']], - $response->getData()['hits']['hits'][0], + $response->getData()['hits']['hits'][1], false, 'Record was not indexed.' ); @@ -90,7 +90,7 @@ class IndexTcaTableTest extends AbstractFunctionalTestCase $response = $this->client->request('typo3content/_search?q=*:*'); $this->assertTrue($response->isOK(), 'Elastica did not answer with ok code.'); - $this->assertSame($response->getData()['hits']['total'], 1, 'Not exactly 1 document was indexed.'); + $this->assertSame($response->getData()['hits']['total'], 2, 'Not exactly 2 documents were indexed.'); } /** @@ -113,16 +113,18 @@ class IndexTcaTableTest extends AbstractFunctionalTestCase $response = $this->client->request('typo3content/_search?q=*:*'); $this->assertTrue($response->isOK(), 'Elastica did not answer with ok code.'); - $this->assertSame($response->getData()['hits']['total'], 2, 'Not exactly 2 documents were indexed.'); + $this->assertSame($response->getData()['hits']['total'], 3, 'Not exactly 3 documents were indexed.'); + $response = $this->client->request('typo3content/_search?q=uid:11'); $this->assertArraySubset( ['_source' => ['header' => 'Also indexable record']], $response->getData()['hits']['hits'][0], false, 'Record was not indexed.' ); + $response = $this->client->request('typo3content/_search?q=uid:6'); $this->assertArraySubset( ['_source' => ['header' => 'indexed content element']], - $response->getData()['hits']['hits'][1], + $response->getData()['hits']['hits'][0], false, 'Record was not indexed.' ); @@ -143,12 +145,12 @@ class IndexTcaTableTest extends AbstractFunctionalTestCase $response = $this->client->request('typo3content/_search?q=*:*'); $this->assertTrue($response->isOK(), 'Elastica did not answer with ok code.'); - $this->assertSame($response->getData()['hits']['total'], 3, 'Not exactly 3 documents were indexed.'); + $this->assertSame($response->getData()['hits']['total'], 4, 'Not exactly 4 documents were indexed.'); - $response = $this->client->request('typo3content/_search?q=uid:9'); + $response = $this->client->request('typo3content/_search?q=uid:11'); $this->assertArraySubset( ['_source' => [ - 'uid' => '9', + 'uid' => '11', 'CType' => 'Header', // Testing items 'categories' => ['Category 1', 'Category 2'], // Testing mm (with sorting) ]], @@ -157,10 +159,10 @@ class IndexTcaTableTest extends AbstractFunctionalTestCase 'Record was not indexed with resolved category relations to multiple values.' ); - $response = $this->client->request('typo3content/_search?q=uid:10'); + $response = $this->client->request('typo3content/_search?q=uid:12'); $this->assertArraySubset( ['_source' => [ - 'uid' => '10', + 'uid' => '12', 'CType' => 'Header', 'categories' => ['Category 2'], ]], diff --git a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml index ec251ce..dd7daaa 100644 --- a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml +++ b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml @@ -67,7 +67,7 @@ 1480686370 0 72 - list + div
not indexed due to ctype
this is the content of header content element that should not get indexed 0 diff --git a/Tests/Functional/Fixtures/Indexing/ResolveRelations.xml b/Tests/Functional/Fixtures/Indexing/ResolveRelations.xml index 8b41246..bb76b81 100644 --- a/Tests/Functional/Fixtures/Indexing/ResolveRelations.xml +++ b/Tests/Functional/Fixtures/Indexing/ResolveRelations.xml @@ -1,7 +1,7 @@ - 9 + 11 1 1480686370 1480686370 @@ -22,7 +22,7 @@ - 10 + 12 1 1480686370 1480686370 @@ -92,7 +92,7 @@ 1 - 9 + 11 tt_content categories 2 @@ -100,7 +100,7 @@ 2 - 9 + 11 tt_content categories 1 @@ -108,7 +108,7 @@ 3 - 9 + 11 tt_content categories 3 @@ -117,7 +117,7 @@ 2 - 10 + 12 tt_content categories 1 diff --git a/Tests/Functional/Fixtures/Indexing/UserWhereClause.xml b/Tests/Functional/Fixtures/Indexing/UserWhereClause.xml index 71212b2..12347ef 100644 --- a/Tests/Functional/Fixtures/Indexing/UserWhereClause.xml +++ b/Tests/Functional/Fixtures/Indexing/UserWhereClause.xml @@ -1,7 +1,7 @@ - 9 + 11 1 1480686370 1480686370 @@ -21,7 +21,7 @@ - 10 + 12 1 1480686370 1480686370 From 7722c37ea53f2060a7feb69ac87fac7d4cf882f9 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 20 Jul 2017 14:22:52 +0200 Subject: [PATCH 6/8] TASK: Check multiple fields for pages to be used as title --- Classes/Domain/Index/TcaIndexer/PagesIndexer.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php index 94a898b..b6b71be 100644 --- a/Classes/Domain/Index/TcaIndexer/PagesIndexer.php +++ b/Classes/Domain/Index/TcaIndexer/PagesIndexer.php @@ -57,6 +57,14 @@ class PagesIndexer extends TcaIndexer */ protected function prepareRecord(array &$record) { + $possibleTitleFields = ['nav_title', 'tx_tqseo_pagetitle_rel', 'title']; + foreach ($possibleTitleFields as $searchTitleField) { + if (isset($record[$searchTitleField]) && trim($record[$searchTitleField])) { + $record['search_title'] = trim($record[$searchTitleField]); + break; + } + } + $record['content'] = $this->fetchContentForPage($record['uid']); parent::prepareRecord($record); } From 334bb34625b0ff579329b906495cc51cb8b0e1ca Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 27 Jul 2017 13:00:51 +0200 Subject: [PATCH 7/8] TASK: Cleanup code Use get and use an try catch. Receiving null through ifExists will result in an php error. --- Classes/Domain/Index/AbstractIndexer.php | 27 ++++++++++++++---------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Classes/Domain/Index/AbstractIndexer.php b/Classes/Domain/Index/AbstractIndexer.php index 18e1702..51acc12 100644 --- a/Classes/Domain/Index/AbstractIndexer.php +++ b/Classes/Domain/Index/AbstractIndexer.php @@ -21,6 +21,7 @@ namespace Codappix\SearchCore\Domain\Index; */ use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Configuration\InvalidArgumentException; use Codappix\SearchCore\Connection\ConnectionInterface; use \TYPO3\CMS\Core\Utility\GeneralUtility; @@ -125,18 +126,22 @@ abstract class AbstractIndexer implements IndexerInterface { $record['search_abstract'] = ''; - $fieldsToUse = GeneralUtility::trimExplode( - ',', - $this->configuration->getIfExists('indexing.' . $this->identifier . '.abstractFields') - ); - if (!$fieldsToUse) { - return; - } - foreach ($fieldsToUse as $fieldToUse) { - if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) { - $record['search_abstract'] = trim($record[$fieldToUse]); - break; + try { + $fieldsToUse = GeneralUtility::trimExplode( + ',', + $this->configuration->get('indexing.' . $this->identifier . '.abstractFields') + ); + if (!$fieldsToUse) { + return; } + foreach ($fieldsToUse as $fieldToUse) { + if (isset($record[$fieldToUse]) && trim($record[$fieldToUse])) { + $record['search_abstract'] = trim($record[$fieldToUse]); + break; + } + } + } catch (InvalidArgumentException $e) { + return; } } From da7692b5022025b5d68084f02e6c9d3baa8e8ac6 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 27 Jul 2017 16:31:37 +0200 Subject: [PATCH 8/8] TASK: Adjust code accordingly to pr --- Classes/Domain/Index/AbstractIndexer.php | 2 +- Configuration/TypoScript/constants.txt | 2 +- Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Classes/Domain/Index/AbstractIndexer.php b/Classes/Domain/Index/AbstractIndexer.php index f4be6db..143a219 100644 --- a/Classes/Domain/Index/AbstractIndexer.php +++ b/Classes/Domain/Index/AbstractIndexer.php @@ -40,7 +40,7 @@ abstract class AbstractIndexer implements IndexerInterface /** * @var string */ - protected $identifier; + protected $identifier = ''; /** * @var \TYPO3\CMS\Core\Log\Logger diff --git a/Configuration/TypoScript/constants.txt b/Configuration/TypoScript/constants.txt index dff37e3..bcf191e 100644 --- a/Configuration/TypoScript/constants.txt +++ b/Configuration/TypoScript/constants.txt @@ -10,7 +10,7 @@ plugin { indexing { pages { - additionalWhereClause = pages.doktype NOT IN (3, 199, 6, 254, 255, 199) + additionalWhereClause = pages.doktype NOT IN (3, 199, 6, 254, 255) abstractFields = abstract, description, bodytext } } diff --git a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml index dd7daaa..c236f07 100644 --- a/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml +++ b/Tests/Functional/Fixtures/Indexing/IndexTcaTable.xml @@ -69,7 +69,7 @@ 72 div
not indexed due to ctype
- this is the content of header content element that should not get indexed + this is the content of div content element that should not get indexed 0 0 0