Merge pull request #69 from Codappix/feature/support-ngram

FEATURE: Add ngram
This commit is contained in:
Daniel Siepmann 2017-07-27 14:27:26 +02:00 committed by GitHub
commit 5fef9e3931
7 changed files with 422 additions and 47 deletions

View file

@ -20,8 +20,11 @@ namespace Codappix\SearchCore\Connection\Elasticsearch;
* 02110-1301, USA. * 02110-1301, USA.
*/ */
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Configuration\InvalidArgumentException;
use Elastica\Exception\ResponseException; use Elastica\Exception\ResponseException;
use TYPO3\CMS\Core\SingletonInterface as Singleton; use TYPO3\CMS\Core\SingletonInterface as Singleton;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface; use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
/** /**
@ -31,6 +34,19 @@ use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
*/ */
class IndexFactory implements Singleton class IndexFactory implements Singleton
{ {
/**
* @var ConfigurationContainerInterface
*/
protected $configuration;
/**
* @param ConfigurationContainerInterface $configuration
*/
public function __construct(ConfigurationContainerInterface $configuration)
{
$this->configuration = $configuration;
}
/** /**
* Get an index bases on TYPO3 table name. * Get an index bases on TYPO3 table name.
* *
@ -41,19 +57,52 @@ class IndexFactory implements Singleton
*/ */
public function getIndex(Connection $connection, $documentType) public function getIndex(Connection $connection, $documentType)
{ {
// TODO: Fetch index name from configuration, based on $documentType.
$index = $connection->getClient()->getIndex('typo3content'); $index = $connection->getClient()->getIndex('typo3content');
try { if ($index->exists() === false) {
// TODO: Provide configuration?! $index->create($this->getConfigurationFor($documentType));
// http://elastica.io/getting-started/storing-and-indexing-documents.html#section-analysis
$index->create();
} catch (ResponseException $exception) {
if (stripos($exception->getMessage(), 'already exists') === false) {
throw $exception;
}
} }
return $index; return $index;
} }
/**
* @param string $documentType
*
* @return array
*/
protected function getConfigurationFor($documentType)
{
try {
$configuration = $this->configuration->get('indexing.' . $documentType . '.index');
if (isset($configuration['analysis']['analyzer'])) {
foreach ($configuration['analysis']['analyzer'] as $key => $analyzer) {
$configuration['analysis']['analyzer'][$key] = $this->prepareAnalyzerConfiguration($analyzer);
}
}
return $configuration;
} catch (InvalidArgumentException $e) {
return [];
}
}
/**
* @param array $analyzer
*
* @return array
*/
protected function prepareAnalyzerConfiguration(array $analyzer)
{
$fieldsToExplode = ['char_filter', 'filter'];
foreach ($fieldsToExplode as $fieldToExplode) {
if (isset($analyzer[$fieldToExplode])) {
$analyzer[$fieldToExplode] = GeneralUtility::trimExplode(',', $analyzer[$fieldToExplode], true);
}
}
return $analyzer;
}
} }

View file

@ -53,7 +53,13 @@ class MappingFactory implements Singleton
{ {
$mapping = new \Elastica\Type\Mapping(); $mapping = new \Elastica\Type\Mapping();
$mapping->setType($type); $mapping->setType($type);
$mapping->setProperties($this->getConfiguration($type->getName()));
$configuration = $this->getConfiguration($type->getName());
if (isset($configuration['_all'])) {
$mapping->setAllField($configuration['_all']);
unset($configuration['_all']);
}
$mapping->setProperties($configuration);
return $mapping; return $mapping;
} }

View file

@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Search;
* 02110-1301, USA. * 02110-1301, USA.
*/ */
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\ConnectionInterface; use Codappix\SearchCore\Connection\ConnectionInterface;
use Codappix\SearchCore\Connection\Elasticsearch\Query; use Codappix\SearchCore\Connection\Elasticsearch\Query;
use Codappix\SearchCore\Connection\SearchRequestInterface; use Codappix\SearchCore\Connection\SearchRequestInterface;
@ -32,6 +33,11 @@ class QueryFactory
*/ */
protected $logger; protected $logger;
/**
* @var ConfigurationContainerInterface
*/
protected $configuration;
/** /**
* @var array * @var array
*/ */
@ -39,10 +45,14 @@ class QueryFactory
/** /**
* @param \TYPO3\CMS\Core\Log\LogManager $logManager * @param \TYPO3\CMS\Core\Log\LogManager $logManager
* @param ConfigurationContainerInterface $configuration
*/ */
public function __construct(\TYPO3\CMS\Core\Log\LogManager $logManager) public function __construct(
{ \TYPO3\CMS\Core\Log\LogManager $logManager,
ConfigurationContainerInterface $configuration
) {
$this->logger = $logManager->getLogger(__CLASS__); $this->logger = $logManager->getLogger(__CLASS__);
$this->configuration = $configuration;
} }
/** /**
@ -76,19 +86,20 @@ class QueryFactory
*/ */
protected function addSearch(SearchRequestInterface $searchRequest) protected function addSearch(SearchRequestInterface $searchRequest)
{ {
$this->query = ArrayUtility::arrayMergeRecursiveOverrule($this->query, [ $this->query = ArrayUtility::setValueByPath(
'query' => [ $this->query,
'bool' => [ 'query.bool.must.0.match._all.query',
'must' => [ $searchRequest->getSearchTerm()
[ );
'match' => [
'_all' => $searchRequest->getSearchTerm() $minimumShouldMatch = $this->configuration->getIfExists('searching.minimumShouldMatch');
], if ($minimumShouldMatch) {
], $this->query = ArrayUtility::setValueByPath(
], $this->query,
], 'query.bool.must.0.match._all.minimum_should_match',
], $minimumShouldMatch
]); );
}
} }
/** /**

View file

@ -184,6 +184,40 @@ options are available:
makes building a facet possible. makes building a facet possible.
.. _index:
``index``
"""""""""
Used by: Elasticsearch connection while indexing.
Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html
Example::
plugin.tx_searchcore.settings.indexing.tt_content.index {
analysis {
analyzer {
ngram4 {
type = custom
tokenizer = ngram4
char_filter = html_strip
filter = lowercase, asciifolding
}
}
tokenizer {
ngram4 {
type = ngram
min_gram = 4
max_gram = 4
}
}
}
}
``char_filter`` and ``filter`` are a comma separated list of options.
.. _configuration_options_search: .. _configuration_options_search:
Searching Searching
@ -209,3 +243,16 @@ Searching
The above example will provide a facet with options for all found ``CType`` results together The above example will provide a facet with options for all found ``CType`` results together
with a count. with a count.
.. _minimumShouldMatch:
``minimumShouldMatch``
""""""""""""""""""""""
Used by: Elasticsearch connection while building search query.
Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html
Example::
plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50%

View file

@ -0,0 +1,132 @@
<?php
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
/*
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\Elasticsearch\Connection;
use Codappix\SearchCore\Connection\Elasticsearch\IndexFactory;
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
class IndexFactoryTest extends AbstractUnitTestCase
{
/**
* @var IndexFactory
*/
protected $subject;
public function setUp()
{
parent::setUp();
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
$this->subject = new IndexFactory($this->configuration);
}
/**
* @test
*/
public function indexIsNotCreatedIfAlreadyExisting()
{
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
->disableOriginalConstructor()
->getMock();
$indexMock->expects($this->once())
->method('exists')
->willReturn(true);
$indexMock->expects($this->never())
->method('create');
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
->disableOriginalConstructor()
->getMock();
$clientMock->expects($this->once())
->method('getIndex')
->with('typo3content')
->willReturn($indexMock);
$connection = $this->getMockBuilder(Connection::class)
->disableOriginalConstructor()
->getMock();
$connection->expects($this->once())
->method('getClient')
->willReturn($clientMock);
$this->subject->getIndex($connection, 'someIndex');
}
/**
* @test
*/
public function typoScriptConfigurationIsProvidedToIndex()
{
$configuration = [
'analysis' => [
'analyzer' => [
'ngram4' => [
'type' => 'custom',
'tokenizer' => 'ngram4',
'char_filter' => 'html_strip',
'filter' => 'lowercase, , asciifolding',
],
],
'tokenizer' => [
'ngram4' => [
'type' => 'ngram',
'min_gram' => 4,
'max_gram' => 4,
],
],
],
];
$expectedConfiguration = $configuration;
$expectedConfiguration['analysis']['analyzer']['ngram4']['char_filter'] = ['html_strip'];
$expectedConfiguration['analysis']['analyzer']['ngram4']['filter'] = ['lowercase', 'asciifolding'];
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
->disableOriginalConstructor()
->getMock();
$indexMock->expects($this->once())
->method('exists')
->willReturn(false);
$indexMock->expects($this->once())
->method('create')
->with($expectedConfiguration);
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
->disableOriginalConstructor()
->getMock();
$clientMock->expects($this->once())
->method('getIndex')
->with('typo3content')
->willReturn($indexMock);
$connection = $this->getMockBuilder(Connection::class)
->disableOriginalConstructor()
->getMock();
$connection->expects($this->once())
->method('getClient')
->willReturn($clientMock);
$this->configuration->expects($this->once())
->method('get')
->with('indexing.someIndex.index')
->willReturn($configuration);
$this->subject->getIndex($connection, 'someIndex');
}
}

View file

@ -0,0 +1,86 @@
<?php
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
/*
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\Elasticsearch\MappingFactory;
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
class MappingFactoryTest extends AbstractUnitTestCase
{
/**
* @var MappingFactory
*/
protected $subject;
public function setUp()
{
parent::setUp();
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
$this->subject = new MappingFactory($this->configuration);
}
/**
* @test
*/
public function typoScriptConfigurationIsProvidedToIndex()
{
$indexName = 'someIndex';
$configuration = [
'_all' => [
'type' => 'text',
'analyzer' => 'ngram4',
],
'channel' => [
'type' => 'keyword',
],
];
$type = $this->getMockBuilder(\Elastica\Type::class)
->disableOriginalConstructor()
->getMock();
$type->expects($this->any())
->method('getName')
->willReturn($indexName);
$this->configuration->expects($this->once())
->method('get')
->with('indexing.' . $indexName . '.mapping')
->willReturn($configuration);
$mapping = $this->subject->getMapping($type)->toArray()[$indexName];
$this->assertArraySubset(
[
'_all' => $configuration['_all']
],
$mapping,
true,
'Configuration of _all field was not set for mapping.'
);
$this->assertArraySubset(
[
'channel' => $configuration['channel']
],
$mapping['properties'],
true,
'Configuration for properties was not set for mapping.'
);
}
}

View file

@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Tests\Unit\Domain\Search;
* 02110-1301, USA. * 02110-1301, USA.
*/ */
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Domain\Model\FacetRequest; use Codappix\SearchCore\Domain\Model\FacetRequest;
use Codappix\SearchCore\Domain\Model\SearchRequest; use Codappix\SearchCore\Domain\Model\SearchRequest;
use Codappix\SearchCore\Domain\Search\QueryFactory; use Codappix\SearchCore\Domain\Search\QueryFactory;
@ -32,11 +33,17 @@ class QueryFactoryTest extends AbstractUnitTestCase
*/ */
protected $subject; protected $subject;
/**
* @var ConfigurationContainerInterface
*/
protected $configuration;
public function setUp() public function setUp()
{ {
parent::setUp(); parent::setUp();
$this->subject = new QueryFactory($this->getMockedLogger()); $this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
$this->subject = new QueryFactory($this->getMockedLogger(), $this->configuration);
} }
/** /**
@ -97,27 +104,6 @@ class QueryFactoryTest extends AbstractUnitTestCase
); );
} }
/**
* @test
*/
public function userInputIsAlwaysString()
{
$searchRequest = new SearchRequest(10);
$searchRequest->setFilter(['field' => 20]);
$query = $this->subject->create($searchRequest);
$this->assertSame(
'10',
$query->toArray()['query']['bool']['must'][0]['match']['_all'],
'Search word was not escaped as expected.'
);
$this->assertSame(
'20',
$query->toArray()['query']['bool']['filter'][0]['term']['field'],
'Search word was not escaped as expected.'
);
}
/** /**
* @test * @test
*/ */
@ -145,4 +131,62 @@ class QueryFactoryTest extends AbstractUnitTestCase
'Facets were not added to query.' 'Facets were not added to query.'
); );
} }
/**
* @test
*/
public function searchTermIsAddedToQuery()
{
$searchRequest = new SearchRequest('SearchWord');
$query = $this->subject->create($searchRequest);
$this->assertSame(
[
'bool' => [
'must' => [
[
'match' => [
'_all' => [
'query' => 'SearchWord',
],
],
],
],
],
],
$query->toArray()['query'],
'Search term was not added to query as expected.'
);
}
/**
* @test
*/
public function minimumShouldMatchIsAddedToQuery()
{
$searchRequest = new SearchRequest('SearchWord');
$this->configuration->expects($this->once())
->method('getIfExists')
->with('searching.minimumShouldMatch')
->willReturn('50%');
$query = $this->subject->create($searchRequest);
$this->assertArraySubset(
[
'bool' => [
'must' => [
[
'match' => [
'_all' => [
'minimum_should_match' => '50%',
],
],
],
],
],
],
$query->toArray()['query'],
'minimum_should_match was not added to query as configured.'
);
}
} }