mirror of
https://github.com/Codappix/search_core.git
synced 2024-12-23 04:36:09 +01:00
Merge pull request #68 from Codappix/feature/support-ngram
FEATURE: Add ngram
This commit is contained in:
commit
a936bd98db
7 changed files with 422 additions and 47 deletions
|
@ -20,8 +20,11 @@ namespace Codappix\SearchCore\Connection\Elasticsearch;
|
|||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Configuration\InvalidArgumentException;
|
||||
use Elastica\Exception\ResponseException;
|
||||
use TYPO3\CMS\Core\SingletonInterface as Singleton;
|
||||
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
||||
use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
|
||||
|
||||
/**
|
||||
|
@ -31,6 +34,19 @@ use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
|
|||
*/
|
||||
class IndexFactory implements Singleton
|
||||
{
|
||||
/**
|
||||
* @var ConfigurationContainerInterface
|
||||
*/
|
||||
protected $configuration;
|
||||
|
||||
/**
|
||||
* @param ConfigurationContainerInterface $configuration
|
||||
*/
|
||||
public function __construct(ConfigurationContainerInterface $configuration)
|
||||
{
|
||||
$this->configuration = $configuration;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an index bases on TYPO3 table name.
|
||||
*
|
||||
|
@ -41,19 +57,52 @@ class IndexFactory implements Singleton
|
|||
*/
|
||||
public function getIndex(Connection $connection, $documentType)
|
||||
{
|
||||
// TODO: Fetch index name from configuration, based on $documentType.
|
||||
$index = $connection->getClient()->getIndex('typo3content');
|
||||
|
||||
try {
|
||||
// TODO: Provide configuration?!
|
||||
// http://elastica.io/getting-started/storing-and-indexing-documents.html#section-analysis
|
||||
$index->create();
|
||||
} catch (ResponseException $exception) {
|
||||
if (stripos($exception->getMessage(), 'already exists') === false) {
|
||||
throw $exception;
|
||||
}
|
||||
if ($index->exists() === false) {
|
||||
$index->create($this->getConfigurationFor($documentType));
|
||||
}
|
||||
|
||||
return $index;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $documentType
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function getConfigurationFor($documentType)
|
||||
{
|
||||
try {
|
||||
$configuration = $this->configuration->get('indexing.' . $documentType . '.index');
|
||||
|
||||
if (isset($configuration['analysis']['analyzer'])) {
|
||||
foreach ($configuration['analysis']['analyzer'] as $key => $analyzer) {
|
||||
$configuration['analysis']['analyzer'][$key] = $this->prepareAnalyzerConfiguration($analyzer);
|
||||
}
|
||||
}
|
||||
|
||||
return $configuration;
|
||||
} catch (InvalidArgumentException $e) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $analyzer
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function prepareAnalyzerConfiguration(array $analyzer)
|
||||
{
|
||||
$fieldsToExplode = ['char_filter', 'filter'];
|
||||
|
||||
foreach ($fieldsToExplode as $fieldToExplode) {
|
||||
if (isset($analyzer[$fieldToExplode])) {
|
||||
$analyzer[$fieldToExplode] = GeneralUtility::trimExplode(',', $analyzer[$fieldToExplode], true);
|
||||
}
|
||||
}
|
||||
|
||||
return $analyzer;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,7 +53,13 @@ class MappingFactory implements Singleton
|
|||
{
|
||||
$mapping = new \Elastica\Type\Mapping();
|
||||
$mapping->setType($type);
|
||||
$mapping->setProperties($this->getConfiguration($type->getName()));
|
||||
|
||||
$configuration = $this->getConfiguration($type->getName());
|
||||
if (isset($configuration['_all'])) {
|
||||
$mapping->setAllField($configuration['_all']);
|
||||
unset($configuration['_all']);
|
||||
}
|
||||
$mapping->setProperties($configuration);
|
||||
|
||||
return $mapping;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Domain\Search;
|
|||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Connection\ConnectionInterface;
|
||||
use Codappix\SearchCore\Connection\Elasticsearch\Query;
|
||||
use Codappix\SearchCore\Connection\SearchRequestInterface;
|
||||
|
@ -32,6 +33,11 @@ class QueryFactory
|
|||
*/
|
||||
protected $logger;
|
||||
|
||||
/**
|
||||
* @var ConfigurationContainerInterface
|
||||
*/
|
||||
protected $configuration;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
|
@ -39,10 +45,14 @@ class QueryFactory
|
|||
|
||||
/**
|
||||
* @param \TYPO3\CMS\Core\Log\LogManager $logManager
|
||||
* @param ConfigurationContainerInterface $configuration
|
||||
*/
|
||||
public function __construct(\TYPO3\CMS\Core\Log\LogManager $logManager)
|
||||
{
|
||||
public function __construct(
|
||||
\TYPO3\CMS\Core\Log\LogManager $logManager,
|
||||
ConfigurationContainerInterface $configuration
|
||||
) {
|
||||
$this->logger = $logManager->getLogger(__CLASS__);
|
||||
$this->configuration = $configuration;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -76,19 +86,20 @@ class QueryFactory
|
|||
*/
|
||||
protected function addSearch(SearchRequestInterface $searchRequest)
|
||||
{
|
||||
$this->query = ArrayUtility::arrayMergeRecursiveOverrule($this->query, [
|
||||
'query' => [
|
||||
'bool' => [
|
||||
'must' => [
|
||||
[
|
||||
'match' => [
|
||||
'_all' => $searchRequest->getSearchTerm()
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
]);
|
||||
$this->query = ArrayUtility::setValueByPath(
|
||||
$this->query,
|
||||
'query.bool.must.0.match._all.query',
|
||||
$searchRequest->getSearchTerm()
|
||||
);
|
||||
|
||||
$minimumShouldMatch = $this->configuration->getIfExists('searching.minimumShouldMatch');
|
||||
if ($minimumShouldMatch) {
|
||||
$this->query = ArrayUtility::setValueByPath(
|
||||
$this->query,
|
||||
'query.bool.must.0.match._all.minimum_should_match',
|
||||
$minimumShouldMatch
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -184,6 +184,40 @@ options are available:
|
|||
makes building a facet possible.
|
||||
|
||||
|
||||
.. _index:
|
||||
|
||||
``index``
|
||||
"""""""""
|
||||
|
||||
Used by: Elasticsearch connection while indexing.
|
||||
|
||||
Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.index {
|
||||
analysis {
|
||||
analyzer {
|
||||
ngram4 {
|
||||
type = custom
|
||||
tokenizer = ngram4
|
||||
char_filter = html_strip
|
||||
filter = lowercase, asciifolding
|
||||
}
|
||||
}
|
||||
|
||||
tokenizer {
|
||||
ngram4 {
|
||||
type = ngram
|
||||
min_gram = 4
|
||||
max_gram = 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
``char_filter`` and ``filter`` are a comma separated list of options.
|
||||
|
||||
.. _configuration_options_search:
|
||||
|
||||
Searching
|
||||
|
@ -209,3 +243,16 @@ Searching
|
|||
|
||||
The above example will provide a facet with options for all found ``CType`` results together
|
||||
with a count.
|
||||
|
||||
.. _minimumShouldMatch:
|
||||
|
||||
``minimumShouldMatch``
|
||||
""""""""""""""""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50%
|
||||
|
|
132
Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php
Normal file
132
Tests/Unit/Connection/Elasticsearch/IndexFactoryTest.php
Normal file
|
@ -0,0 +1,132 @@
|
|||
<?php
|
||||
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Connection\Elasticsearch\Connection;
|
||||
use Codappix\SearchCore\Connection\Elasticsearch\IndexFactory;
|
||||
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
|
||||
|
||||
class IndexFactoryTest extends AbstractUnitTestCase
|
||||
{
|
||||
/**
|
||||
* @var IndexFactory
|
||||
*/
|
||||
protected $subject;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
|
||||
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
|
||||
$this->subject = new IndexFactory($this->configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function indexIsNotCreatedIfAlreadyExisting()
|
||||
{
|
||||
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$indexMock->expects($this->once())
|
||||
->method('exists')
|
||||
->willReturn(true);
|
||||
$indexMock->expects($this->never())
|
||||
->method('create');
|
||||
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$clientMock->expects($this->once())
|
||||
->method('getIndex')
|
||||
->with('typo3content')
|
||||
->willReturn($indexMock);
|
||||
$connection = $this->getMockBuilder(Connection::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$connection->expects($this->once())
|
||||
->method('getClient')
|
||||
->willReturn($clientMock);
|
||||
|
||||
$this->subject->getIndex($connection, 'someIndex');
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function typoScriptConfigurationIsProvidedToIndex()
|
||||
{
|
||||
$configuration = [
|
||||
'analysis' => [
|
||||
'analyzer' => [
|
||||
'ngram4' => [
|
||||
'type' => 'custom',
|
||||
'tokenizer' => 'ngram4',
|
||||
'char_filter' => 'html_strip',
|
||||
'filter' => 'lowercase, , asciifolding',
|
||||
],
|
||||
],
|
||||
'tokenizer' => [
|
||||
'ngram4' => [
|
||||
'type' => 'ngram',
|
||||
'min_gram' => 4,
|
||||
'max_gram' => 4,
|
||||
],
|
||||
],
|
||||
],
|
||||
];
|
||||
|
||||
$expectedConfiguration = $configuration;
|
||||
$expectedConfiguration['analysis']['analyzer']['ngram4']['char_filter'] = ['html_strip'];
|
||||
$expectedConfiguration['analysis']['analyzer']['ngram4']['filter'] = ['lowercase', 'asciifolding'];
|
||||
|
||||
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$indexMock->expects($this->once())
|
||||
->method('exists')
|
||||
->willReturn(false);
|
||||
$indexMock->expects($this->once())
|
||||
->method('create')
|
||||
->with($expectedConfiguration);
|
||||
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$clientMock->expects($this->once())
|
||||
->method('getIndex')
|
||||
->with('typo3content')
|
||||
->willReturn($indexMock);
|
||||
$connection = $this->getMockBuilder(Connection::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$connection->expects($this->once())
|
||||
->method('getClient')
|
||||
->willReturn($clientMock);
|
||||
|
||||
$this->configuration->expects($this->once())
|
||||
->method('get')
|
||||
->with('indexing.someIndex.index')
|
||||
->willReturn($configuration);
|
||||
|
||||
$this->subject->getIndex($connection, 'someIndex');
|
||||
}
|
||||
}
|
86
Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php
Normal file
86
Tests/Unit/Connection/Elasticsearch/MappingFactoryTest.php
Normal file
|
@ -0,0 +1,86 @@
|
|||
<?php
|
||||
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Connection\Elasticsearch\MappingFactory;
|
||||
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
|
||||
|
||||
class MappingFactoryTest extends AbstractUnitTestCase
|
||||
{
|
||||
/**
|
||||
* @var MappingFactory
|
||||
*/
|
||||
protected $subject;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
|
||||
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
|
||||
$this->subject = new MappingFactory($this->configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function typoScriptConfigurationIsProvidedToIndex()
|
||||
{
|
||||
$indexName = 'someIndex';
|
||||
$configuration = [
|
||||
'_all' => [
|
||||
'type' => 'text',
|
||||
'analyzer' => 'ngram4',
|
||||
],
|
||||
'channel' => [
|
||||
'type' => 'keyword',
|
||||
],
|
||||
];
|
||||
$type = $this->getMockBuilder(\Elastica\Type::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$type->expects($this->any())
|
||||
->method('getName')
|
||||
->willReturn($indexName);
|
||||
$this->configuration->expects($this->once())
|
||||
->method('get')
|
||||
->with('indexing.' . $indexName . '.mapping')
|
||||
->willReturn($configuration);
|
||||
|
||||
$mapping = $this->subject->getMapping($type)->toArray()[$indexName];
|
||||
$this->assertArraySubset(
|
||||
[
|
||||
'_all' => $configuration['_all']
|
||||
],
|
||||
$mapping,
|
||||
true,
|
||||
'Configuration of _all field was not set for mapping.'
|
||||
);
|
||||
$this->assertArraySubset(
|
||||
[
|
||||
'channel' => $configuration['channel']
|
||||
],
|
||||
$mapping['properties'],
|
||||
true,
|
||||
'Configuration for properties was not set for mapping.'
|
||||
);
|
||||
}
|
||||
}
|
|
@ -20,6 +20,7 @@ namespace Codappix\SearchCore\Tests\Unit\Domain\Search;
|
|||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Domain\Model\FacetRequest;
|
||||
use Codappix\SearchCore\Domain\Model\SearchRequest;
|
||||
use Codappix\SearchCore\Domain\Search\QueryFactory;
|
||||
|
@ -32,11 +33,17 @@ class QueryFactoryTest extends AbstractUnitTestCase
|
|||
*/
|
||||
protected $subject;
|
||||
|
||||
/**
|
||||
* @var ConfigurationContainerInterface
|
||||
*/
|
||||
protected $configuration;
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
|
||||
$this->subject = new QueryFactory($this->getMockedLogger());
|
||||
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
|
||||
$this->subject = new QueryFactory($this->getMockedLogger(), $this->configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -97,27 +104,6 @@ class QueryFactoryTest extends AbstractUnitTestCase
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function userInputIsAlwaysString()
|
||||
{
|
||||
$searchRequest = new SearchRequest(10);
|
||||
$searchRequest->setFilter(['field' => 20]);
|
||||
|
||||
$query = $this->subject->create($searchRequest);
|
||||
$this->assertSame(
|
||||
'10',
|
||||
$query->toArray()['query']['bool']['must'][0]['match']['_all'],
|
||||
'Search word was not escaped as expected.'
|
||||
);
|
||||
$this->assertSame(
|
||||
'20',
|
||||
$query->toArray()['query']['bool']['filter'][0]['term']['field'],
|
||||
'Search word was not escaped as expected.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
|
@ -145,4 +131,62 @@ class QueryFactoryTest extends AbstractUnitTestCase
|
|||
'Facets were not added to query.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function searchTermIsAddedToQuery()
|
||||
{
|
||||
$searchRequest = new SearchRequest('SearchWord');
|
||||
$query = $this->subject->create($searchRequest);
|
||||
|
||||
$this->assertSame(
|
||||
[
|
||||
'bool' => [
|
||||
'must' => [
|
||||
[
|
||||
'match' => [
|
||||
'_all' => [
|
||||
'query' => 'SearchWord',
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
$query->toArray()['query'],
|
||||
'Search term was not added to query as expected.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function minimumShouldMatchIsAddedToQuery()
|
||||
{
|
||||
$searchRequest = new SearchRequest('SearchWord');
|
||||
$this->configuration->expects($this->once())
|
||||
->method('getIfExists')
|
||||
->with('searching.minimumShouldMatch')
|
||||
->willReturn('50%');
|
||||
$query = $this->subject->create($searchRequest);
|
||||
|
||||
$this->assertArraySubset(
|
||||
[
|
||||
'bool' => [
|
||||
'must' => [
|
||||
[
|
||||
'match' => [
|
||||
'_all' => [
|
||||
'minimum_should_match' => '50%',
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
],
|
||||
$query->toArray()['query'],
|
||||
'minimum_should_match was not added to query as configured.'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue