From 6462052c9b050ed6906e3ca25ae501b71ad81563 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 25 Jul 2017 09:52:17 +0200 Subject: [PATCH 1/2] FEATURE: Add ngram Provide configuration for index. Provide minimum_should_match configuration. --- .../Connection/Elasticsearch/IndexFactory.php | 39 ++++-- .../Elasticsearch/MappingFactory.php | 8 +- Classes/Domain/Search/QueryFactory.php | 41 ++++-- Documentation/source/configuration.rst | 50 +++++++ .../Elasticsearch/IndexFactoryTest.php | 132 ++++++++++++++++++ .../Elasticsearch/MappingFactoryTest.php | 86 ++++++++++++ Tests/Unit/Domain/Search/QueryFactoryTest.php | 88 +++++++++--- 7 files changed, 397 insertions(+), 47 deletions(-) create mode 100644 Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php create mode 100644 Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php diff --git a/Classes/Connection/Elasticsearch/IndexFactory.php b/Classes/Connection/Elasticsearch/IndexFactory.php index 019b091..f9fac51 100644 --- a/Classes/Connection/Elasticsearch/IndexFactory.php +++ b/Classes/Connection/Elasticsearch/IndexFactory.php @@ -20,6 +20,8 @@ namespace Codappix\SearchCore\Connection\Elasticsearch; * 02110-1301, USA. */ +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Configuration\InvalidArgumentException; use Elastica\Exception\ResponseException; use TYPO3\CMS\Core\SingletonInterface as Singleton; use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface; @@ -31,6 +33,19 @@ use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface; */ class IndexFactory implements Singleton { + /** + * @var ConfigurationContainerInterface + */ + protected $configuration; + + /** + * @param ConfigurationContainerInterface $configuration + */ + public function __construct(ConfigurationContainerInterface $configuration) + { + $this->configuration = $configuration; + } + /** * Get an index bases on TYPO3 table name. * @@ -41,19 +56,25 @@ class IndexFactory implements Singleton */ public function getIndex(Connection $connection, $documentType) { - // TODO: Fetch index name from configuration, based on $documentType. $index = $connection->getClient()->getIndex('typo3content'); - try { - // TODO: Provide configuration?! - // http://elastica.io/getting-started/storing-and-indexing-documents.html#section-analysis - $index->create(); - } catch (ResponseException $exception) { - if (stripos($exception->getMessage(), 'already exists') === false) { - throw $exception; - } + if ($index->exists() === false) { + $index->create($this->getConfigurationFor($documentType)); } return $index; } + + /** + * @param string $documentType + * @return array + */ + protected function getConfigurationFor($documentType) + { + try { + return $this->configuration->get('indexing.' . $documentType . '.index'); + } catch (InvalidArgumentException $e) { + return []; + } + } } diff --git a/Classes/Connection/Elasticsearch/MappingFactory.php b/Classes/Connection/Elasticsearch/MappingFactory.php index d4ac037..44a05c1 100644 --- a/Classes/Connection/Elasticsearch/MappingFactory.php +++ b/Classes/Connection/Elasticsearch/MappingFactory.php @@ -53,7 +53,13 @@ class MappingFactory implements Singleton { $mapping = new \Elastica\Type\Mapping(); $mapping->setType($type); - $mapping->setProperties($this->getConfiguration($type->getName())); + + $configuration = $this->getConfiguration($type->getName()); + if (isset($configuration['_all'])) { + $mapping->setAllField($configuration['_all']); + unset($configuration['_all']); + } + $mapping->setProperties($configuration); return $mapping; } diff --git a/Classes/Domain/Search/QueryFactory.php b/Classes/Domain/Search/QueryFactory.php index 7809fb6..a64bcf1 100644 --- a/Classes/Domain/Search/QueryFactory.php +++ b/Classes/Domain/Search/QueryFactory.php @@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Search; * 02110-1301, USA. */ +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; use Codappix\SearchCore\Connection\ConnectionInterface; use Codappix\SearchCore\Connection\Elasticsearch\Query; use Codappix\SearchCore\Connection\SearchRequestInterface; @@ -32,6 +33,11 @@ class QueryFactory */ protected $logger; + /** + * @var ConfigurationContainerInterface + */ + protected $configuration; + /** * @var array */ @@ -39,10 +45,14 @@ class QueryFactory /** * @param \TYPO3\CMS\Core\Log\LogManager $logManager + * @param ConfigurationContainerInterface $configuration */ - public function __construct(\TYPO3\CMS\Core\Log\LogManager $logManager) - { + public function __construct( + \TYPO3\CMS\Core\Log\LogManager $logManager, + ConfigurationContainerInterface $configuration + ) { $this->logger = $logManager->getLogger(__CLASS__); + $this->configuration = $configuration; } /** @@ -76,19 +86,20 @@ class QueryFactory */ protected function addSearch(SearchRequestInterface $searchRequest) { - $this->query = ArrayUtility::arrayMergeRecursiveOverrule($this->query, [ - 'query' => [ - 'bool' => [ - 'must' => [ - [ - 'match' => [ - '_all' => $searchRequest->getSearchTerm() - ], - ], - ], - ], - ], - ]); + $this->query = ArrayUtility::setValueByPath( + $this->query, + 'query.bool.must.0.match._all.query', + $searchRequest->getSearchTerm() + ); + + $minimumShouldMatch = $this->configuration->getIfExists('searching.minimumShouldMatch'); + if ($minimumShouldMatch) { + $this->query = ArrayUtility::setValueByPath( + $this->query, + 'query.bool.must.0.match._all.minimum_should_match', + $minimumShouldMatch + ); + } } /** diff --git a/Documentation/source/configuration.rst b/Documentation/source/configuration.rst index 3d8db75..e7b8a7f 100644 --- a/Documentation/source/configuration.rst +++ b/Documentation/source/configuration.rst @@ -184,6 +184,43 @@ options are available: makes building a facet possible. +.. _index: + +``index`` +""""""""" + + Used by: Elasticsearch connection while indexing. + + Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html + + Example:: + + plugin.tx_searchcore.settings.indexing.tt_content.index { + analysis { + analyzer { + ngram4 { + type = custom + tokenizer = ngram4 + char_filter { + html_strip + } + + filter { + lowercase + } + } + } + + tokenizer { + ngram4 { + type = ngram + min_gram = 4 + max_gram = 4 + } + } + } + } + .. _configuration_options_search: Searching @@ -209,3 +246,16 @@ Searching The above example will provide a facet with options for all found ``CType`` results together with a count. + +.. _minimumShouldMatch: + +``minimumShouldMatch`` +"""""""""""""""""""""" + + Used by: Elasticsearch connection while building search query. + + Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html + + Example:: + + plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50% diff --git a/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php b/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php new file mode 100644 index 0000000..ee55156 --- /dev/null +++ b/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php @@ -0,0 +1,132 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Connection\Elasticsearch\Connection; +use Codappix\SearchCore\Connection\Elasticsearch\IndexFactory; +use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase; + +class IndexFactoryTest extends AbstractUnitTestCase +{ + /** + * @var IndexFactory + */ + protected $subject; + + public function setUp() + { + parent::setUp(); + + $this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock(); + $this->subject = new IndexFactory($this->configuration); + } + + /** + * @test + */ + public function indexIsNotCreatedIfAlreadyExisting() + { + $indexMock = $this->getMockBuilder(\Elastica\Index::class) + ->disableOriginalConstructor() + ->getMock(); + $indexMock->expects($this->once()) + ->method('exists') + ->willReturn(true); + $indexMock->expects($this->never()) + ->method('create'); + $clientMock = $this->getMockBuilder(\Elastica\Client::class) + ->disableOriginalConstructor() + ->getMock(); + $clientMock->expects($this->once()) + ->method('getIndex') + ->with('typo3content') + ->willReturn($indexMock); + $connection = $this->getMockBuilder(Connection::class) + ->disableOriginalConstructor() + ->getMock(); + $connection->expects($this->once()) + ->method('getClient') + ->willReturn($clientMock); + + $this->subject->getIndex($connection, 'someIndex'); + } + + /** + * @test + */ + public function typoScriptConfigurationIsProvidedToIndex() + { + $configuration = [ + 'analysis' => [ + 'analyzer' => [ + 'ngram4' => [ + 'type' => 'custom', + 'tokenizer' => 'ngram4', + 'char_filter' => [ + 'html_strip', + ], + 'filter' => [ + 'lowercase', + ], + ], + ], + 'tokenizer' => [ + 'ngram4' => [ + 'type' => 'ngram', + 'min_gram' => 4, + 'max_gram' => 4, + ], + ], + ], + ]; + + $indexMock = $this->getMockBuilder(\Elastica\Index::class) + ->disableOriginalConstructor() + ->getMock(); + $indexMock->expects($this->once()) + ->method('exists') + ->willReturn(false); + $indexMock->expects($this->once()) + ->method('create') + ->with($configuration); + $clientMock = $this->getMockBuilder(\Elastica\Client::class) + ->disableOriginalConstructor() + ->getMock(); + $clientMock->expects($this->once()) + ->method('getIndex') + ->with('typo3content') + ->willReturn($indexMock); + $connection = $this->getMockBuilder(Connection::class) + ->disableOriginalConstructor() + ->getMock(); + $connection->expects($this->once()) + ->method('getClient') + ->willReturn($clientMock); + + $this->configuration->expects($this->once()) + ->method('get') + ->with('indexing.someIndex.index') + ->willReturn($configuration); + + $this->subject->getIndex($connection, 'someIndex'); + } +} diff --git a/Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php b/Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php new file mode 100644 index 0000000..8dde1cb --- /dev/null +++ b/Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php @@ -0,0 +1,86 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; +use Codappix\SearchCore\Connection\Elasticsearch\MappingFactory; +use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase; + +class MappingFactoryTest extends AbstractUnitTestCase +{ + /** + * @var MappingFactory + */ + protected $subject; + + public function setUp() + { + parent::setUp(); + + $this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock(); + $this->subject = new MappingFactory($this->configuration); + } + + /** + * @test + */ + public function typoScriptConfigurationIsProvidedToIndex() + { + $indexName = 'someIndex'; + $configuration = [ + '_all' => [ + 'type' => 'text', + 'analyzer' => 'ngram4', + ], + 'channel' => [ + 'type' => 'keyword', + ], + ]; + $type = $this->getMockBuilder(\Elastica\Type::class) + ->disableOriginalConstructor() + ->getMock(); + $type->expects($this->any()) + ->method('getName') + ->willReturn($indexName); + $this->configuration->expects($this->once()) + ->method('get') + ->with('indexing.' . $indexName . '.mapping') + ->willReturn($configuration); + + $mapping = $this->subject->getMapping($type)->toArray()[$indexName]; + $this->assertArraySubset( + [ + '_all' => $configuration['_all'] + ], + $mapping, + true, + 'Configuration of _all field was not set for mapping.' + ); + $this->assertArraySubset( + [ + 'channel' => $configuration['channel'] + ], + $mapping['properties'], + true, + 'Configuration for properties was not set for mapping.' + ); + } +} diff --git a/Tests/Unit/Domain/Search/QueryFactoryTest.php b/Tests/Unit/Domain/Search/QueryFactoryTest.php index 32c8fbd..8dcf218 100644 --- a/Tests/Unit/Domain/Search/QueryFactoryTest.php +++ b/Tests/Unit/Domain/Search/QueryFactoryTest.php @@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Tests\Unit\Domain\Search; * 02110-1301, USA. */ +use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; use Codappix\SearchCore\Domain\Model\FacetRequest; use Codappix\SearchCore\Domain\Model\SearchRequest; use Codappix\SearchCore\Domain\Search\QueryFactory; @@ -32,11 +33,17 @@ class QueryFactoryTest extends AbstractUnitTestCase */ protected $subject; + /** + * @var ConfigurationContainerInterface + */ + protected $configuration; + public function setUp() { parent::setUp(); - $this->subject = new QueryFactory($this->getMockedLogger()); + $this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock(); + $this->subject = new QueryFactory($this->getMockedLogger(), $this->configuration); } /** @@ -97,27 +104,6 @@ class QueryFactoryTest extends AbstractUnitTestCase ); } - /** - * @test - */ - public function userInputIsAlwaysString() - { - $searchRequest = new SearchRequest(10); - $searchRequest->setFilter(['field' => 20]); - - $query = $this->subject->create($searchRequest); - $this->assertSame( - '10', - $query->toArray()['query']['bool']['must'][0]['match']['_all'], - 'Search word was not escaped as expected.' - ); - $this->assertSame( - '20', - $query->toArray()['query']['bool']['filter'][0]['term']['field'], - 'Search word was not escaped as expected.' - ); - } - /** * @test */ @@ -145,4 +131,62 @@ class QueryFactoryTest extends AbstractUnitTestCase 'Facets were not added to query.' ); } + + /** + * @test + */ + public function searchTermIsAddedToQuery() + { + $searchRequest = new SearchRequest('SearchWord'); + $query = $this->subject->create($searchRequest); + + $this->assertSame( + [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + '_all' => [ + 'query' => 'SearchWord', + ], + ], + ], + ], + ], + ], + $query->toArray()['query'], + 'Search term was not added to query as expected.' + ); + } + + /** + * @test + */ + public function minimumShouldMatchIsAddedToQuery() + { + $searchRequest = new SearchRequest('SearchWord'); + $this->configuration->expects($this->once()) + ->method('getIfExists') + ->with('searching.minimumShouldMatch') + ->willReturn('50%'); + $query = $this->subject->create($searchRequest); + + $this->assertArraySubset( + [ + 'bool' => [ + 'must' => [ + [ + 'match' => [ + '_all' => [ + 'minimum_should_match' => '50%', + ], + ], + ], + ], + ], + ], + $query->toArray()['query'], + 'minimum_should_match was not added to query as configured.' + ); + } } From c1cc16efa59154cb8a6516a37a86e5ad79510465 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 25 Jul 2017 15:00:25 +0200 Subject: [PATCH 2/2] BUGFIX: Fix nun working options due to miss match of ts and es As TypoScript does not provide a way to configure key less options, we use a comma separated list and explode them to stay compatible with elasticsearch. --- .../Connection/Elasticsearch/IndexFactory.php | 30 ++++++++++++++++++- Documentation/source/configuration.rst | 11 +++---- .../Elasticsearch/IndexFactoryTest.php | 14 ++++----- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/Classes/Connection/Elasticsearch/IndexFactory.php b/Classes/Connection/Elasticsearch/IndexFactory.php index f9fac51..66b448b 100644 --- a/Classes/Connection/Elasticsearch/IndexFactory.php +++ b/Classes/Connection/Elasticsearch/IndexFactory.php @@ -24,6 +24,7 @@ use Codappix\SearchCore\Configuration\ConfigurationContainerInterface; use Codappix\SearchCore\Configuration\InvalidArgumentException; use Elastica\Exception\ResponseException; use TYPO3\CMS\Core\SingletonInterface as Singleton; +use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface; /** @@ -67,14 +68,41 @@ class IndexFactory implements Singleton /** * @param string $documentType + * * @return array */ protected function getConfigurationFor($documentType) { try { - return $this->configuration->get('indexing.' . $documentType . '.index'); + $configuration = $this->configuration->get('indexing.' . $documentType . '.index'); + + if (isset($configuration['analysis']['analyzer'])) { + foreach ($configuration['analysis']['analyzer'] as $key => $analyzer) { + $configuration['analysis']['analyzer'][$key] = $this->prepareAnalyzerConfiguration($analyzer); + } + } + + return $configuration; } catch (InvalidArgumentException $e) { return []; } } + + /** + * @param array $analyzer + * + * @return array + */ + protected function prepareAnalyzerConfiguration(array $analyzer) + { + $fieldsToExplode = ['char_filter', 'filter']; + + foreach ($fieldsToExplode as $fieldToExplode) { + if (isset($analyzer[$fieldToExplode])) { + $analyzer[$fieldToExplode] = GeneralUtility::trimExplode(',', $analyzer[$fieldToExplode], true); + } + } + + return $analyzer; + } } diff --git a/Documentation/source/configuration.rst b/Documentation/source/configuration.rst index e7b8a7f..68994d7 100644 --- a/Documentation/source/configuration.rst +++ b/Documentation/source/configuration.rst @@ -201,13 +201,8 @@ options are available: ngram4 { type = custom tokenizer = ngram4 - char_filter { - html_strip - } - - filter { - lowercase - } + char_filter = html_strip + filter = lowercase, asciifolding } } @@ -221,6 +216,8 @@ options are available: } } + ``char_filter`` and ``filter`` are a comma separated list of options. + .. _configuration_options_search: Searching diff --git a/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php b/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php index ee55156..c73fe36 100644 --- a/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php +++ b/Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php @@ -81,12 +81,8 @@ class IndexFactoryTest extends AbstractUnitTestCase 'ngram4' => [ 'type' => 'custom', 'tokenizer' => 'ngram4', - 'char_filter' => [ - 'html_strip', - ], - 'filter' => [ - 'lowercase', - ], + 'char_filter' => 'html_strip', + 'filter' => 'lowercase, , asciifolding', ], ], 'tokenizer' => [ @@ -99,6 +95,10 @@ class IndexFactoryTest extends AbstractUnitTestCase ], ]; + $expectedConfiguration = $configuration; + $expectedConfiguration['analysis']['analyzer']['ngram4']['char_filter'] = ['html_strip']; + $expectedConfiguration['analysis']['analyzer']['ngram4']['filter'] = ['lowercase', 'asciifolding']; + $indexMock = $this->getMockBuilder(\Elastica\Index::class) ->disableOriginalConstructor() ->getMock(); @@ -107,7 +107,7 @@ class IndexFactoryTest extends AbstractUnitTestCase ->willReturn(false); $indexMock->expects($this->once()) ->method('create') - ->with($configuration); + ->with($expectedConfiguration); $clientMock = $this->getMockBuilder(\Elastica\Client::class) ->disableOriginalConstructor() ->getMock();