Merge branch 'develop' into feature/boosting

This commit is contained in:
Daniel Siepmann 2017-07-27 14:48:43 +02:00
commit 0f4186048c
Signed by: Daniel Siepmann
GPG key ID: 33D6629915560EF4
11 changed files with 428 additions and 56 deletions

View file

@ -87,7 +87,7 @@ class Facet implements FacetInterface
} }
foreach ($this->buckets as $bucket) { foreach ($this->buckets as $bucket) {
$this->options[] = new FacetOption($bucket); $this->options[$bucket['key']] = new FacetOption($bucket);
} }
} }
} }

View file

@ -20,8 +20,11 @@ namespace Codappix\SearchCore\Connection\Elasticsearch;
* 02110-1301, USA. * 02110-1301, USA.
*/ */
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Configuration\InvalidArgumentException;
use Elastica\Exception\ResponseException; use Elastica\Exception\ResponseException;
use TYPO3\CMS\Core\SingletonInterface as Singleton; use TYPO3\CMS\Core\SingletonInterface as Singleton;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface; use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
/** /**
@ -31,6 +34,19 @@ use TYPO3\CMS\Extbase\Configuration\ConfigurationManagerInterface;
*/ */
class IndexFactory implements Singleton class IndexFactory implements Singleton
{ {
/**
* @var ConfigurationContainerInterface
*/
protected $configuration;
/**
* @param ConfigurationContainerInterface $configuration
*/
public function __construct(ConfigurationContainerInterface $configuration)
{
$this->configuration = $configuration;
}
/** /**
* Get an index bases on TYPO3 table name. * Get an index bases on TYPO3 table name.
* *
@ -41,19 +57,52 @@ class IndexFactory implements Singleton
*/ */
public function getIndex(Connection $connection, $documentType) public function getIndex(Connection $connection, $documentType)
{ {
// TODO: Fetch index name from configuration, based on $documentType.
$index = $connection->getClient()->getIndex('typo3content'); $index = $connection->getClient()->getIndex('typo3content');
try { if ($index->exists() === false) {
// TODO: Provide configuration?! $index->create($this->getConfigurationFor($documentType));
// http://elastica.io/getting-started/storing-and-indexing-documents.html#section-analysis
$index->create();
} catch (ResponseException $exception) {
if (stripos($exception->getMessage(), 'already exists') === false) {
throw $exception;
}
} }
return $index; return $index;
} }
/**
* @param string $documentType
*
* @return array
*/
protected function getConfigurationFor($documentType)
{
try {
$configuration = $this->configuration->get('indexing.' . $documentType . '.index');
if (isset($configuration['analysis']['analyzer'])) {
foreach ($configuration['analysis']['analyzer'] as $key => $analyzer) {
$configuration['analysis']['analyzer'][$key] = $this->prepareAnalyzerConfiguration($analyzer);
}
}
return $configuration;
} catch (InvalidArgumentException $e) {
return [];
}
}
/**
* @param array $analyzer
*
* @return array
*/
protected function prepareAnalyzerConfiguration(array $analyzer)
{
$fieldsToExplode = ['char_filter', 'filter'];
foreach ($fieldsToExplode as $fieldToExplode) {
if (isset($analyzer[$fieldToExplode])) {
$analyzer[$fieldToExplode] = GeneralUtility::trimExplode(',', $analyzer[$fieldToExplode], true);
}
}
return $analyzer;
}
} }

View file

@ -53,7 +53,13 @@ class MappingFactory implements Singleton
{ {
$mapping = new \Elastica\Type\Mapping(); $mapping = new \Elastica\Type\Mapping();
$mapping->setType($type); $mapping->setType($type);
$mapping->setProperties($this->getConfiguration($type->getName()));
$configuration = $this->getConfiguration($type->getName());
if (isset($configuration['_all'])) {
$mapping->setAllField($configuration['_all']);
unset($configuration['_all']);
}
$mapping->setProperties($configuration);
return $mapping; return $mapping;
} }

View file

@ -140,7 +140,7 @@ class SearchResult implements SearchResultInterface
} }
foreach ($this->result->getAggregations() as $aggregationName => $aggregation) { foreach ($this->result->getAggregations() as $aggregationName => $aggregation) {
$this->facets[] = $this->objectManager->get(Facet::class, $aggregationName, $aggregation); $this->facets[$aggregationName] = $this->objectManager->get(Facet::class, $aggregationName, $aggregation);
} }
} }
} }

View file

@ -83,8 +83,7 @@ abstract class AbstractIndexer implements IndexerInterface
protected function getRecordGenerator() protected function getRecordGenerator()
{ {
$offset = 0; $offset = 0;
// TODO: Make configurable. $limit = $this->getLimit();
$limit = 50;
while (($records = $this->getRecords($offset, $limit)) !== []) { while (($records = $this->getRecords($offset, $limit)) !== []) {
yield $records; yield $records;
@ -92,6 +91,17 @@ abstract class AbstractIndexer implements IndexerInterface
} }
} }
/**
* Returns the limit to use to fetch records.
*
* @return int
*/
protected function getLimit()
{
// TODO: Make configurable.
return 50;
}
/** /**
* @param int $offset * @param int $offset
* @param int $limit * @param int $limit

View file

@ -95,19 +95,20 @@ class QueryFactory
*/ */
protected function addSearch(SearchRequestInterface $searchRequest) protected function addSearch(SearchRequestInterface $searchRequest)
{ {
$this->query = ArrayUtility::arrayMergeRecursiveOverrule($this->query, [ $this->query = ArrayUtility::setValueByPath(
'query' => [ $this->query,
'bool' => [ 'query.bool.must.0.match._all.query',
'must' => [ $searchRequest->getSearchTerm()
[ );
'match' => [
'_all' => $searchRequest->getSearchTerm() $minimumShouldMatch = $this->configuration->getIfExists('searching.minimumShouldMatch');
], if ($minimumShouldMatch) {
], $this->query = ArrayUtility::setValueByPath(
], $this->query,
], 'query.bool.must.0.match._all.minimum_should_match',
], $minimumShouldMatch
]); );
}
} }
/** /**

View file

@ -184,6 +184,40 @@ options are available:
makes building a facet possible. makes building a facet possible.
.. _index:
``index``
"""""""""
Used by: Elasticsearch connection while indexing.
Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html
Example::
plugin.tx_searchcore.settings.indexing.tt_content.index {
analysis {
analyzer {
ngram4 {
type = custom
tokenizer = ngram4
char_filter = html_strip
filter = lowercase, asciifolding
}
}
tokenizer {
ngram4 {
type = ngram
min_gram = 4
max_gram = 4
}
}
}
}
``char_filter`` and ``filter`` are a comma separated list of options.
.. _configuration_options_search: .. _configuration_options_search:
Searching Searching
@ -210,6 +244,19 @@ Searching
The above example will provide a facet with options for all found ``CType`` results together The above example will provide a facet with options for all found ``CType`` results together
with a count. with a count.
.. _minimumShouldMatch:
``minimumShouldMatch``
""""""""""""""""""""""
Used by: Elasticsearch connection while building search query.
Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html
Example::
plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50%
.. _boost: .. _boost:
``boost`` ``boost``

View file

@ -78,16 +78,16 @@ class FilterTest extends AbstractFunctionalTestCase
$this->assertSame(1, count($result->getFacets()), 'Did not receive the single defined facet.'); $this->assertSame(1, count($result->getFacets()), 'Did not receive the single defined facet.');
$facet = $result->getFacets()[0]; $facet = current($result->getFacets());
$this->assertSame('contentTypes', $facet->getName(), 'Name of facet was not as expected.'); $this->assertSame('contentTypes', $facet->getName(), 'Name of facet was not as expected.');
$this->assertSame('CType', $facet->getField(), 'Field of facet was not expected.'); $this->assertSame('CType', $facet->getField(), 'Field of facet was not expected.');
$options = $facet->getOptions(); $options = $facet->getOptions();
$this->assertSame(2, count($options), 'Did not receive the expected number of possible options for facet.'); $this->assertSame(2, count($options), 'Did not receive the expected number of possible options for facet.');
$option = $options[0]; $option = $options['HTML'];
$this->assertSame('HTML', $option->getName(), 'Option did not have expected Name.'); $this->assertSame('HTML', $option->getName(), 'Option did not have expected Name.');
$this->assertSame(1, $option->getCount(), 'Option did not have expected count.'); $this->assertSame(1, $option->getCount(), 'Option did not have expected count.');
$option = $options[1]; $option = $options['Header'];
$this->assertSame('Header', $option->getName(), 'Option did not have expected Name.'); $this->assertSame('Header', $option->getName(), 'Option did not have expected Name.');
$this->assertSame(1, $option->getCount(), 'Option did not have expected count.'); $this->assertSame(1, $option->getCount(), 'Option did not have expected count.');
} }

View file

@ -0,0 +1,132 @@
<?php
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
/*
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\Elasticsearch\Connection;
use Codappix\SearchCore\Connection\Elasticsearch\IndexFactory;
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
class IndexFactoryTest extends AbstractUnitTestCase
{
/**
* @var IndexFactory
*/
protected $subject;
public function setUp()
{
parent::setUp();
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
$this->subject = new IndexFactory($this->configuration);
}
/**
* @test
*/
public function indexIsNotCreatedIfAlreadyExisting()
{
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
->disableOriginalConstructor()
->getMock();
$indexMock->expects($this->once())
->method('exists')
->willReturn(true);
$indexMock->expects($this->never())
->method('create');
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
->disableOriginalConstructor()
->getMock();
$clientMock->expects($this->once())
->method('getIndex')
->with('typo3content')
->willReturn($indexMock);
$connection = $this->getMockBuilder(Connection::class)
->disableOriginalConstructor()
->getMock();
$connection->expects($this->once())
->method('getClient')
->willReturn($clientMock);
$this->subject->getIndex($connection, 'someIndex');
}
/**
* @test
*/
public function typoScriptConfigurationIsProvidedToIndex()
{
$configuration = [
'analysis' => [
'analyzer' => [
'ngram4' => [
'type' => 'custom',
'tokenizer' => 'ngram4',
'char_filter' => 'html_strip',
'filter' => 'lowercase, , asciifolding',
],
],
'tokenizer' => [
'ngram4' => [
'type' => 'ngram',
'min_gram' => 4,
'max_gram' => 4,
],
],
],
];
$expectedConfiguration = $configuration;
$expectedConfiguration['analysis']['analyzer']['ngram4']['char_filter'] = ['html_strip'];
$expectedConfiguration['analysis']['analyzer']['ngram4']['filter'] = ['lowercase', 'asciifolding'];
$indexMock = $this->getMockBuilder(\Elastica\Index::class)
->disableOriginalConstructor()
->getMock();
$indexMock->expects($this->once())
->method('exists')
->willReturn(false);
$indexMock->expects($this->once())
->method('create')
->with($expectedConfiguration);
$clientMock = $this->getMockBuilder(\Elastica\Client::class)
->disableOriginalConstructor()
->getMock();
$clientMock->expects($this->once())
->method('getIndex')
->with('typo3content')
->willReturn($indexMock);
$connection = $this->getMockBuilder(Connection::class)
->disableOriginalConstructor()
->getMock();
$connection->expects($this->once())
->method('getClient')
->willReturn($clientMock);
$this->configuration->expects($this->once())
->method('get')
->with('indexing.someIndex.index')
->willReturn($configuration);
$this->subject->getIndex($connection, 'someIndex');
}
}

View file

@ -0,0 +1,86 @@
<?php
namespace Codappix\SearchCore\Tests\Unit\Connection\Elasticsearch;
/*
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
use Codappix\SearchCore\Connection\Elasticsearch\MappingFactory;
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
class MappingFactoryTest extends AbstractUnitTestCase
{
/**
* @var MappingFactory
*/
protected $subject;
public function setUp()
{
parent::setUp();
$this->configuration = $this->getMockBuilder(ConfigurationContainerInterface::class)->getMock();
$this->subject = new MappingFactory($this->configuration);
}
/**
* @test
*/
public function typoScriptConfigurationIsProvidedToIndex()
{
$indexName = 'someIndex';
$configuration = [
'_all' => [
'type' => 'text',
'analyzer' => 'ngram4',
],
'channel' => [
'type' => 'keyword',
],
];
$type = $this->getMockBuilder(\Elastica\Type::class)
->disableOriginalConstructor()
->getMock();
$type->expects($this->any())
->method('getName')
->willReturn($indexName);
$this->configuration->expects($this->once())
->method('get')
->with('indexing.' . $indexName . '.mapping')
->willReturn($configuration);
$mapping = $this->subject->getMapping($type)->toArray()[$indexName];
$this->assertArraySubset(
[
'_all' => $configuration['_all']
],
$mapping,
true,
'Configuration of _all field was not set for mapping.'
);
$this->assertArraySubset(
[
'channel' => $configuration['channel']
],
$mapping['properties'],
true,
'Configuration for properties was not set for mapping.'
);
}
}

View file

@ -117,31 +117,6 @@ class QueryFactoryTest extends AbstractUnitTestCase
); );
} }
/**
* @test
*/
public function userInputIsAlwaysString()
{
$this->configuration->expects($this->any())
->method('get')
->will($this->throwException(new InvalidArgumentException));
$searchRequest = new SearchRequest(10);
$searchRequest->setFilter(['field' => 20]);
$query = $this->subject->create($searchRequest);
$this->assertSame(
'10',
$query->toArray()['query']['bool']['must'][0]['match']['_all'],
'Search word was not escaped as expected.'
);
$this->assertSame(
'20',
$query->toArray()['query']['bool']['filter'][0]['term']['field'],
'Search word was not escaped as expected.'
);
}
/** /**
* @test * @test
*/ */
@ -173,6 +148,70 @@ class QueryFactoryTest extends AbstractUnitTestCase
); );
} }
/**
* @test
*/
public function searchTermIsAddedToQuery()
{
$searchRequest = new SearchRequest('SearchWord');
$this->configuration->expects($this->any())
->method('get')
->will($this->throwException(new InvalidArgumentException));
$query = $this->subject->create($searchRequest);
$this->assertSame(
[
'bool' => [
'must' => [
[
'match' => [
'_all' => [
'query' => 'SearchWord',
],
],
],
],
],
],
$query->toArray()['query'],
'Search term was not added to query as expected.'
);
}
/**
* @test
*/
public function minimumShouldMatchIsAddedToQuery()
{
$searchRequest = new SearchRequest('SearchWord');
$this->configuration->expects($this->once())
->method('getIfExists')
->with('searching.minimumShouldMatch')
->willReturn('50%');
$this->configuration->expects($this->any())
->method('get')
->will($this->throwException(new InvalidArgumentException));
$query = $this->subject->create($searchRequest);
$this->assertArraySubset(
[
'bool' => [
'must' => [
[
'match' => [
'_all' => [
'minimum_should_match' => '50%',
],
],
],
],
],
],
$query->toArray()['query'],
'minimum_should_match was not added to query as configured.'
);
}
/** /**
* @test * @test
*/ */
@ -245,7 +284,9 @@ class QueryFactoryTest extends AbstractUnitTestCase
'must' => [ 'must' => [
[ [
'match' => [ 'match' => [
'_all' => 'SearchWord', '_all' => [
'query' => 'SearchWord',
],
], ],
], ],
], ],