mirror of
https://github.com/Codappix/search_core.git
synced 2024-11-22 12:36:11 +01:00
Merge pull request #90 from Codappix/feature/data-processing
FEATURE: Add data processing to extension
This commit is contained in:
commit
a507eb165d
14 changed files with 691 additions and 331 deletions
54
Classes/DataProcessing/CopyToProcessor.php
Normal file
54
Classes/DataProcessing/CopyToProcessor.php
Normal file
|
@ -0,0 +1,54 @@
|
|||
<?php
|
||||
namespace Codappix\SearchCore\DataProcessing;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Copies values from one field to another one.
|
||||
*/
|
||||
class CopyToProcessor implements ProcessorInterface
|
||||
{
|
||||
public function processRecord(array $record, array $configuration)
|
||||
{
|
||||
$all = [];
|
||||
|
||||
$this->addArray($all, $record);
|
||||
$all = array_filter($all);
|
||||
$record[$configuration['to']] = implode(PHP_EOL, $all);
|
||||
|
||||
return $record;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array &$target
|
||||
* @param array $from
|
||||
*/
|
||||
protected function addArray(array &$target, array $from)
|
||||
{
|
||||
foreach ($from as $value) {
|
||||
if (is_array($value)) {
|
||||
$this->addArray($target, $value);
|
||||
continue;
|
||||
}
|
||||
|
||||
$target[] = (string) $value;
|
||||
}
|
||||
}
|
||||
}
|
39
Classes/DataProcessing/ProcessorInterface.php
Normal file
39
Classes/DataProcessing/ProcessorInterface.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
namespace Codappix\SearchCore\DataProcessing;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* All DataProcessing Processors should implement this interface, otherwise they
|
||||
* will not be executed.
|
||||
*/
|
||||
interface ProcessorInterface
|
||||
{
|
||||
/**
|
||||
* Processes the given record.
|
||||
* Also retrieves the configuration for this processor instance.
|
||||
*
|
||||
* @param array $record
|
||||
* @param array $configuration
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function processRecord(array $record, array $configuration);
|
||||
}
|
|
@ -21,6 +21,8 @@ namespace Codappix\SearchCore\Domain\Index\TcaIndexer;
|
|||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\Configuration\InvalidArgumentException as InvalidConfigurationArgumentException;
|
||||
use Codappix\SearchCore\DataProcessing\ProcessorInterface;
|
||||
use Codappix\SearchCore\Database\Doctrine\Join;
|
||||
use Codappix\SearchCore\Database\Doctrine\Where;
|
||||
use Codappix\SearchCore\Domain\Index\IndexingException;
|
||||
|
@ -146,6 +148,24 @@ class TcaTableService
|
|||
{
|
||||
$this->relationResolver->resolveRelationsForRecord($this, $record);
|
||||
|
||||
try {
|
||||
foreach ($this->configuration->get('indexing.' . $this->tableName . '.dataProcessing') as $configuration) {
|
||||
$className = '';
|
||||
if (is_string($configuration)) {
|
||||
$className = $configuration;
|
||||
$configuration = [];
|
||||
} else {
|
||||
$className = $configuration['_typoScriptNodeValue'];
|
||||
}
|
||||
$dataProcessor = GeneralUtility::makeInstance($className);
|
||||
if ($dataProcessor instanceof ProcessorInterface) {
|
||||
$record = $dataProcessor->processRecord($record, $configuration);
|
||||
}
|
||||
}
|
||||
} catch (InvalidConfigurationArgumentException $e) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
if (isset($record['uid']) && !isset($record['search_identifier'])) {
|
||||
$record['search_identifier'] = $record['uid'];
|
||||
}
|
||||
|
|
|
@ -28,3 +28,13 @@ The indexing is done by one of the available indexer. For each identifier it's p
|
|||
the indexer to use. Also it's possible to write custom indexer to use.
|
||||
|
||||
Currently only the :ref:`TcaIndexer` is provided.
|
||||
|
||||
.. _concepts_indexing_dataprocessing:
|
||||
|
||||
DataProcessing
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
Before data is transfered to search service, it can be processed by "DataProcessors" like already
|
||||
known by :ref:`t3tsref:cobj-fluidtemplate-properties-dataprocessing` of :ref:`t3tsref:cobj-fluidtemplate`.
|
||||
|
||||
Configuration is done through TypoScript, see :ref:`dataProcessing`.
|
||||
|
|
|
@ -304,6 +304,7 @@ texinfo_documents = [
|
|||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {
|
||||
't3tcaref': ('https://docs.typo3.org/typo3cms/TCAReference/', None),
|
||||
't3tsref': ('https://docs.typo3.org/typo3cms/TyposcriptReference/', None),
|
||||
}
|
||||
extlinks = {
|
||||
'project': ('https://github.com/Codappix/search_core/projects/%s', 'Github project: '),
|
||||
|
|
|
@ -36,330 +36,14 @@ Here is the example default configuration that's provided through static include
|
|||
Options
|
||||
-------
|
||||
|
||||
The following section contains the different options, e.g. for :ref:`connections` and
|
||||
:ref:`indexer`: ``plugin.tx_searchcore.settings.connection`` or
|
||||
``plugin.tx_searchcore.settings.indexing``.
|
||||
The following sections contains the different options grouped by their applied area, e.g. for
|
||||
:ref:`connections` and :ref:`indexer`: ``plugin.tx_searchcore.settings.connection`` or
|
||||
``plugin.tx_searchcore.settings.indexing``:
|
||||
|
||||
.. _configuration_options_connection:
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
connections
|
||||
^^^^^^^^^^^
|
||||
|
||||
Holds settings regarding the different possible connections for search services like Elasticsearch
|
||||
or Solr.
|
||||
|
||||
Configured as::
|
||||
|
||||
plugin {
|
||||
tx_searchcore {
|
||||
settings {
|
||||
connections {
|
||||
connectionName {
|
||||
// the settings
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Where ``connectionName`` is one of the available :ref:`connections`.
|
||||
|
||||
The following settings are available. For each setting its documented which connection consumes it.
|
||||
|
||||
.. _host:
|
||||
|
||||
``host``
|
||||
""""""""
|
||||
|
||||
Used by: :ref:`Elasticsearch`.
|
||||
|
||||
The host, e.g. ``localhost`` or an IP where the search service is reachable from TYPO3
|
||||
installation.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.connections.elasticsearch.host = localhost
|
||||
|
||||
.. _port:
|
||||
|
||||
``port``
|
||||
""""""""
|
||||
|
||||
Used by: :ref:`Elasticsearch`.
|
||||
|
||||
The port where search service is reachable. E.g. default ``9200`` for Elasticsearch.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.connections.elasticsearch.port = 9200
|
||||
|
||||
|
||||
.. _configuration_options_index:
|
||||
|
||||
Indexing
|
||||
^^^^^^^^
|
||||
|
||||
Holds settings regarding the indexing, e.g. of TYPO3 records, to search services.
|
||||
|
||||
Configured as::
|
||||
|
||||
plugin {
|
||||
tx_searchcore {
|
||||
settings {
|
||||
indexing {
|
||||
identifier {
|
||||
indexer = FullyQualifiedClassname
|
||||
// the settings
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Where ``identifier`` is up to you, but should match table names to make :ref:`TcaIndexer` work.
|
||||
|
||||
The following settings are available. For each setting its documented which indexer consumes it.
|
||||
|
||||
.. _rootLineBlacklist:
|
||||
|
||||
``rootLineBlacklist``
|
||||
"""""""""""""""""""""
|
||||
|
||||
Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
|
||||
|
||||
Defines a blacklist of page uids. Records below any of these pages, or subpages, are not
|
||||
indexed. This allows you to define areas that should not be indexed.
|
||||
The page attribute *No Search* is also taken into account to prevent indexing records from only one
|
||||
page without recursion.
|
||||
|
||||
Contains a comma separated list of page uids. Spaces are trimmed.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.rootLineBlacklist = 3, 10, 100
|
||||
|
||||
Also it's possible to define some behaviour for the different document types. In context of TYPO3
|
||||
tables are used as document types 1:1. It's possible to configure different tables. The following
|
||||
options are available:
|
||||
|
||||
.. _additionalWhereClause:
|
||||
|
||||
``additionalWhereClause``
|
||||
"""""""""""""""""""""""""
|
||||
|
||||
Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
|
||||
|
||||
Add additional SQL to where clauses to determine indexable records from the table. This way you
|
||||
can exclude specific records like ``tt_content`` records with specific ``CType`` values or
|
||||
something else. E.g. you can add a new field to the table to exclude records from indexing.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.additionalWhereClause = tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu')
|
||||
|
||||
.. attention::
|
||||
|
||||
Make sure to prefix all fields with the corresponding table name. The selection from
|
||||
database will contain joins and can lead to SQL errors if a field exists in multiple tables.
|
||||
|
||||
.. _abstractFields:
|
||||
|
||||
``abstractFields``
|
||||
"""""""""""""""""""""""""
|
||||
|
||||
Used by: :ref:`PagesIndexer`.
|
||||
|
||||
Define which field should be used to provide the auto generated field "search_abstract".
|
||||
The fields have to exist in the record to be indexed. Therefore fields like ``content`` are also
|
||||
possible.
|
||||
|
||||
Example::
|
||||
|
||||
# As last fallback we use the content of the page
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.abstractFields := addToList(content)
|
||||
|
||||
Default::
|
||||
|
||||
abstract, description, bodytext
|
||||
|
||||
.. _mapping:
|
||||
|
||||
``mapping``
|
||||
"""""""""""
|
||||
|
||||
Used by: Elasticsearch connection while indexing.
|
||||
|
||||
Define mapping for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/mapping.html
|
||||
You are able to define the mapping for each property / columns.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.mapping {
|
||||
CType {
|
||||
type = keyword
|
||||
}
|
||||
}
|
||||
|
||||
The above example will define the ``CType`` field of ``tt_content`` as ``type: keyword``. This
|
||||
makes building a facet possible.
|
||||
|
||||
|
||||
.. _index:
|
||||
|
||||
``index``
|
||||
"""""""""
|
||||
|
||||
Used by: Elasticsearch connection while indexing.
|
||||
|
||||
Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.index {
|
||||
analysis {
|
||||
analyzer {
|
||||
ngram4 {
|
||||
type = custom
|
||||
tokenizer = ngram4
|
||||
char_filter = html_strip
|
||||
filter = lowercase, asciifolding
|
||||
}
|
||||
}
|
||||
|
||||
tokenizer {
|
||||
ngram4 {
|
||||
type = ngram
|
||||
min_gram = 4
|
||||
max_gram = 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
``char_filter`` and ``filter`` are a comma separated list of options.
|
||||
|
||||
.. _configuration_options_search:
|
||||
|
||||
Searching
|
||||
^^^^^^^^^
|
||||
|
||||
.. _size:
|
||||
|
||||
``size``
|
||||
""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Defined how many search results should be fetched to be available in search result.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.size = 50
|
||||
|
||||
Default if not configured is 10.
|
||||
|
||||
.. _facets:
|
||||
|
||||
``facets``
|
||||
"""""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define aggregations for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-terms-aggregation.html
|
||||
Currently only the term facet is provided.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.facets {
|
||||
contentTypes {
|
||||
field = CType
|
||||
}
|
||||
}
|
||||
|
||||
The above example will provide a facet with options for all found ``CType`` results together
|
||||
with a count.
|
||||
|
||||
.. _filter:
|
||||
|
||||
``filter``
|
||||
"""""""""""
|
||||
|
||||
Used by: While building search request.
|
||||
|
||||
Define filter that should be set for all requests.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.filter {
|
||||
property = value
|
||||
}
|
||||
|
||||
For Elasticsearch the fields have to be filterable, e.g. need a mapping as ``keyword``.
|
||||
|
||||
.. _minimumShouldMatch:
|
||||
|
||||
``minimumShouldMatch``
|
||||
""""""""""""""""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50%
|
||||
|
||||
.. _boost:
|
||||
|
||||
``boost``
|
||||
"""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define fields that should boost the score for results.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.boost {
|
||||
search_title = 3
|
||||
search_abstract = 1.5
|
||||
}
|
||||
|
||||
For further information take a look at
|
||||
https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_boosting_query_clauses.html
|
||||
|
||||
.. _fieldValueFactor:
|
||||
|
||||
``fieldValueFactor``
|
||||
""""""""""""""""""""
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define a field to use as a factor for scoring. The configuration is passed through to elastic
|
||||
search ``field_value_factor``, see: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-function-score-query.html#function-field-value-factor
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.field_value_factor {
|
||||
field = rootlineLevel
|
||||
modifier = reciprocal
|
||||
factor = 2
|
||||
missing = 1
|
||||
}
|
||||
|
||||
.. _mode:
|
||||
|
||||
``mode``
|
||||
""""""""
|
||||
|
||||
Used by: Controller while preparing action.
|
||||
|
||||
Define to switch from search to filter mode.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching {
|
||||
mode = filter
|
||||
}
|
||||
|
||||
Only ``filter`` is allowed as value. Will submit an empty query to switch to filter mode.
|
||||
configuration/connections
|
||||
configuration/indexing
|
||||
configuration/searching
|
||||
|
|
55
Documentation/source/configuration/connections.rst
Normal file
55
Documentation/source/configuration/connections.rst
Normal file
|
@ -0,0 +1,55 @@
|
|||
.. _configuration_options_connection:
|
||||
|
||||
Connections
|
||||
===========
|
||||
|
||||
Holds settings regarding the different possible connections for search services like Elasticsearch
|
||||
or Solr.
|
||||
|
||||
Configured as::
|
||||
|
||||
plugin {
|
||||
tx_searchcore {
|
||||
settings {
|
||||
connections {
|
||||
connectionName {
|
||||
// the settings
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Where ``connectionName`` is one of the available :ref:`connections`.
|
||||
|
||||
The following settings are available. For each setting its documented which connection consumes it.
|
||||
|
||||
.. _host:
|
||||
|
||||
``host``
|
||||
--------
|
||||
|
||||
Used by: :ref:`Elasticsearch`.
|
||||
|
||||
The host, e.g. ``localhost`` or an IP where the search service is reachable from TYPO3
|
||||
installation.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.connections.elasticsearch.host = localhost
|
||||
|
||||
.. _port:
|
||||
|
||||
``port``
|
||||
--------
|
||||
|
||||
Used by: :ref:`Elasticsearch`.
|
||||
|
||||
The port where search service is reachable. E.g. default ``9200`` for Elasticsearch.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.connections.elasticsearch.port = 9200
|
||||
|
||||
|
||||
|
202
Documentation/source/configuration/indexing.rst
Normal file
202
Documentation/source/configuration/indexing.rst
Normal file
|
@ -0,0 +1,202 @@
|
|||
.. _configuration_options_index:
|
||||
|
||||
Indexing
|
||||
========
|
||||
|
||||
Holds settings regarding the indexing, e.g. of TYPO3 records, to search services.
|
||||
|
||||
Configured as::
|
||||
|
||||
plugin {
|
||||
tx_searchcore {
|
||||
settings {
|
||||
indexing {
|
||||
identifier {
|
||||
indexer = FullyQualifiedClassname
|
||||
// the settings
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Where ``identifier`` is up to you, but should match table names to make :ref:`TcaIndexer` work.
|
||||
|
||||
The following settings are available. For each setting its documented which indexer consumes it.
|
||||
|
||||
.. _rootLineBlacklist:
|
||||
|
||||
rootLineBlacklist
|
||||
-----------------
|
||||
|
||||
Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
|
||||
|
||||
Defines a blacklist of page uids. Records below any of these pages, or subpages, are not
|
||||
indexed. This allows you to define areas that should not be indexed.
|
||||
The page attribute *No Search* is also taken into account to prevent indexing records from only one
|
||||
page without recursion.
|
||||
|
||||
Contains a comma separated list of page uids. Spaces are trimmed.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.rootLineBlacklist = 3, 10, 100
|
||||
|
||||
Also it's possible to define some behaviour for the different document types. In context of TYPO3
|
||||
tables are used as document types 1:1. It's possible to configure different tables. The following
|
||||
options are available:
|
||||
|
||||
.. _additionalWhereClause:
|
||||
|
||||
additionalWhereClause
|
||||
---------------------
|
||||
|
||||
Used by: :ref:`TcaIndexer`, :ref:`PagesIndexer`.
|
||||
|
||||
Add additional SQL to where clauses to determine indexable records from the table. This way you
|
||||
can exclude specific records like ``tt_content`` records with specific ``CType`` values or
|
||||
something else. E.g. you can add a new field to the table to exclude records from indexing.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.additionalWhereClause = tt_content.CType NOT IN ('gridelements_pi1', 'list', 'div', 'menu')
|
||||
|
||||
.. attention::
|
||||
|
||||
Make sure to prefix all fields with the corresponding table name. The selection from
|
||||
database will contain joins and can lead to SQL errors if a field exists in multiple tables.
|
||||
|
||||
.. _abstractFields:
|
||||
|
||||
abstractFields
|
||||
--------------
|
||||
|
||||
Used by: :ref:`PagesIndexer`.
|
||||
|
||||
Define which field should be used to provide the auto generated field "search_abstract".
|
||||
The fields have to exist in the record to be indexed. Therefore fields like ``content`` are also
|
||||
possible.
|
||||
|
||||
Example::
|
||||
|
||||
# As last fallback we use the content of the page
|
||||
plugin.tx_searchcore.settings.indexing.<identifier>.abstractFields := addToList(content)
|
||||
|
||||
Default::
|
||||
|
||||
abstract, description, bodytext
|
||||
|
||||
.. _mapping:
|
||||
|
||||
mapping
|
||||
-------
|
||||
|
||||
Used by: Elasticsearch connection while indexing.
|
||||
|
||||
Define mapping for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/mapping.html
|
||||
You are able to define the mapping for each property / columns.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.mapping {
|
||||
CType {
|
||||
type = keyword
|
||||
}
|
||||
}
|
||||
|
||||
The above example will define the ``CType`` field of ``tt_content`` as ``type: keyword``. This
|
||||
makes building a facet possible.
|
||||
|
||||
.. _index:
|
||||
|
||||
index
|
||||
-----
|
||||
|
||||
Used by: Elasticsearch connection while indexing.
|
||||
|
||||
Define index for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/indices-create-index.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.index {
|
||||
analysis {
|
||||
analyzer {
|
||||
ngram4 {
|
||||
type = custom
|
||||
tokenizer = ngram4
|
||||
char_filter = html_strip
|
||||
filter = lowercase, asciifolding
|
||||
}
|
||||
}
|
||||
|
||||
tokenizer {
|
||||
ngram4 {
|
||||
type = ngram
|
||||
min_gram = 4
|
||||
max_gram = 4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
``char_filter`` and ``filter`` are a comma separated list of options.
|
||||
|
||||
.. _dataProcessing:
|
||||
|
||||
dataProcessing
|
||||
--------------
|
||||
|
||||
Used by: All connections while indexing.
|
||||
|
||||
Configure modifications on each document before sending it to the configured connection. Same as
|
||||
provided by TYPO3 for :ref:`t3tsref:cobj-fluidtemplate` through
|
||||
:ref:`t3tsref:cobj-fluidtemplate-properties-dataprocessing`.
|
||||
|
||||
All processors are applied in configured order. Allowing to work with already processed data.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.indexing.tt_content.dataProcessing {
|
||||
1 = Codappix\SearchCore\DataProcessing\CopyToProcessor
|
||||
1 {
|
||||
to = search_spellcheck
|
||||
}
|
||||
|
||||
2 = Codappix\SearchCore\DataProcessing\CopyToProcessor
|
||||
2 {
|
||||
to = search_all
|
||||
}
|
||||
}
|
||||
|
||||
The above example will copy all existing fields to the field ``search_spellcheck``. Afterwards
|
||||
all fields, including ``search_spellcheck`` will be copied to ``search_all``.
|
||||
E.g. used to index all information into a field for :ref:`spellchecking` or searching with
|
||||
different :ref:`mapping`.
|
||||
|
||||
The following Processor are available:
|
||||
|
||||
``Codappix\SearchCore\DataProcessing\CopyToProcessor``
|
||||
Will copy contents of fields to other fields
|
||||
|
||||
The following Processor are planned:
|
||||
|
||||
``Codappix\SearchCore\DataProcessing\ReplaceProcessor``
|
||||
Will execute a search and replace on configured fields.
|
||||
|
||||
``Codappix\SearchCore\DataProcessing\RootLevelProcessor``
|
||||
Will attach the root level to the record.
|
||||
|
||||
``Codappix\SearchCore\DataProcessing\ChannelProcessor``
|
||||
Will add a configurable channel to the record, e.g. if you have different areas in your
|
||||
website like "products" and "infos".
|
||||
|
||||
``Codappix\SearchCore\DataProcessing\RelationResolverProcessor``
|
||||
Resolves all relations using the TCA.
|
||||
|
||||
Of course you are able to provide further processors. Just implement
|
||||
``Codappix\SearchCore\DataProcessing\ProcessorInterface`` and use the FQCN (=Fully qualified
|
||||
class name) as done in the examples above.
|
||||
|
||||
By implementing also the same interface as necessary for TYPO3
|
||||
:ref:`t3tsref:cobj-fluidtemplate-properties-dataprocessing`, you are able to reuse the same code
|
||||
also for Fluid to prepare the same record fetched from DB for your fluid.
|
125
Documentation/source/configuration/searching.rst
Normal file
125
Documentation/source/configuration/searching.rst
Normal file
|
@ -0,0 +1,125 @@
|
|||
.. _configuration_options_search:
|
||||
|
||||
Searching
|
||||
=========
|
||||
|
||||
.. _size:
|
||||
|
||||
size
|
||||
----
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Defined how many search results should be fetched to be available in search result.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.size = 50
|
||||
|
||||
Default if not configured is 10.
|
||||
|
||||
.. _facets:
|
||||
|
||||
facets
|
||||
------
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define aggregations for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-aggregations-bucket-terms-aggregation.html
|
||||
Currently only the term facet is provided.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.facets {
|
||||
contentTypes {
|
||||
field = CType
|
||||
}
|
||||
}
|
||||
|
||||
The above example will provide a facet with options for all found ``CType`` results together
|
||||
with a count.
|
||||
|
||||
.. _filter:
|
||||
|
||||
``filter``
|
||||
"""""""""""
|
||||
|
||||
Used by: While building search request.
|
||||
|
||||
Define filter that should be set for all requests.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.filter {
|
||||
property = value
|
||||
}
|
||||
|
||||
For Elasticsearch the fields have to be filterable, e.g. need a mapping as ``keyword``.
|
||||
|
||||
.. _minimumShouldMatch:
|
||||
|
||||
minimumShouldMatch
|
||||
------------------
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define the minimum match for Elasticsearch, have a look at the official docs: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-minimum-should-match.html
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.minimumShouldMatch = 50%
|
||||
|
||||
.. _boost:
|
||||
|
||||
boost
|
||||
-----
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define fields that should boost the score for results.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.boost {
|
||||
search_title = 3
|
||||
search_abstract = 1.5
|
||||
}
|
||||
|
||||
For further information take a look at
|
||||
https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_boosting_query_clauses.html
|
||||
|
||||
.. _fieldValueFactor:
|
||||
|
||||
fieldValueFactor
|
||||
----------------
|
||||
|
||||
Used by: Elasticsearch connection while building search query.
|
||||
|
||||
Define a field to use as a factor for scoring. The configuration is passed through to elastic
|
||||
search ``field_value_factor``, see: https://www.elastic.co/guide/en/elasticsearch/reference/5.2/query-dsl-function-score-query.html#function-field-value-factor
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching.field_value_factor {
|
||||
field = rootlineLevel
|
||||
modifier = reciprocal
|
||||
factor = 2
|
||||
missing = 1
|
||||
}
|
||||
|
||||
.. _mode:
|
||||
|
||||
``mode``
|
||||
""""""""
|
||||
|
||||
Used by: Controller while preparing action.
|
||||
|
||||
Define to switch from search to filter mode.
|
||||
|
||||
Example::
|
||||
|
||||
plugin.tx_searchcore.settings.searching {
|
||||
mode = filter
|
||||
}
|
||||
|
||||
Only ``filter`` is allowed as value. Will submit an empty query to switch to filter mode.
|
|
@ -24,7 +24,7 @@ Currently all fields are searched for a single search input.
|
|||
|
||||
Also multiple filter are supported. Filtering results by fields for string contents.
|
||||
|
||||
Even facets / aggregates are now possible. Therefore a mapping has to be defined in TypoScript for
|
||||
Facets / aggregates are also possible. Therefore a mapping has to be defined in TypoScript for
|
||||
indexing, and the facets itself while searching.
|
||||
|
||||
.. _features_planned:
|
||||
|
|
|
@ -21,12 +21,18 @@ further stuff.
|
|||
|
||||
The indexer is configurable through the following options:
|
||||
|
||||
* :ref:`allowedTables`
|
||||
|
||||
* :ref:`rootLineBlacklist`
|
||||
|
||||
* :ref:`additionalWhereClause`
|
||||
|
||||
* :ref:`abstractFields`
|
||||
|
||||
* :ref:`mapping`
|
||||
|
||||
* :ref:`index`
|
||||
|
||||
* :ref:`dataProcessing`
|
||||
|
||||
.. _PagesIndexer:
|
||||
|
||||
PagesIndexer
|
||||
|
@ -42,14 +48,18 @@ improve search.
|
|||
|
||||
The indexer is configurable through the following options:
|
||||
|
||||
* :ref:`allowedTables`
|
||||
|
||||
* :ref:`rootLineBlacklist`
|
||||
|
||||
* :ref:`additionalWhereClause`
|
||||
|
||||
* :ref:`abstractFields`
|
||||
|
||||
* :ref:`mapping`
|
||||
|
||||
* :ref:`index`
|
||||
|
||||
* :ref:`dataProcessing`
|
||||
|
||||
.. note::
|
||||
|
||||
Not all relations are resolved yet, see :issue:`17` and :pr:`20`.
|
||||
|
|
|
@ -100,7 +100,7 @@ class ProcessesAllowedTablesTest extends AbstractDataHandlerTest
|
|||
->with(
|
||||
$this->equalTo('tt_content'),
|
||||
$this->callback(function ($record) {
|
||||
return isset($record['uid']) && $record['uid'] === '2'
|
||||
return isset($record['uid']) && $record['uid'] === 2
|
||||
&& isset($record['pid']) && $record['pid'] === 1
|
||||
&& isset($record['header']) && $record['header'] === 'a new record'
|
||||
;
|
||||
|
|
85
Tests/Unit/DataProcessing/CopyToProcessorTest.php
Normal file
85
Tests/Unit/DataProcessing/CopyToProcessorTest.php
Normal file
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
namespace Codappix\SearchCore\Tests\Unit\DataProcessing;
|
||||
|
||||
/*
|
||||
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
use Codappix\SearchCore\DataProcessing\CopyToProcessor;
|
||||
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
|
||||
|
||||
class CopyToProcessorTest extends AbstractUnitTestCase
|
||||
{
|
||||
/**
|
||||
* @test
|
||||
* @dataProvider getPossibleRecordConfigurationCombinations
|
||||
*/
|
||||
public function fieldsAreCopiedAsConfigured(array $record, array $configuration, array $expectedRecord)
|
||||
{
|
||||
$subject = new CopyToProcessor();
|
||||
$processedRecord = $subject->processRecord($record, $configuration);
|
||||
$this->assertSame(
|
||||
$expectedRecord,
|
||||
$processedRecord,
|
||||
'The processor did not return the expected processed record.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getPossibleRecordConfigurationCombinations()
|
||||
{
|
||||
return [
|
||||
'Copy all fields to new field' => [
|
||||
'record' => [
|
||||
'field 1' => 'Some content like lorem',
|
||||
'field 2' => 'Some more content like ipsum',
|
||||
],
|
||||
'configuration' => [
|
||||
'to' => 'new_field',
|
||||
],
|
||||
'expectedRecord' => [
|
||||
'field 1' => 'Some content like lorem',
|
||||
'field 2' => 'Some more content like ipsum',
|
||||
'new_field' => 'Some content like lorem' . PHP_EOL . 'Some more content like ipsum',
|
||||
],
|
||||
],
|
||||
'Copy all fields with sub array to new field' => [
|
||||
'record' => [
|
||||
'field 1' => 'Some content like lorem',
|
||||
'field with sub2' => [
|
||||
'Tag 1',
|
||||
'Tag 2',
|
||||
],
|
||||
],
|
||||
'configuration' => [
|
||||
'to' => 'new_field',
|
||||
],
|
||||
'expectedRecord' => [
|
||||
'field 1' => 'Some content like lorem',
|
||||
'field with sub2' => [
|
||||
'Tag 1',
|
||||
'Tag 2',
|
||||
],
|
||||
'new_field' => 'Some content like lorem' . PHP_EOL . 'Tag 1' . PHP_EOL . 'Tag 2',
|
||||
],
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
|
@ -21,6 +21,8 @@ namespace Codappix\SearchCore\Tests\Unit\Domain\Index\TcaIndexer;
|
|||
*/
|
||||
|
||||
use Codappix\SearchCore\Configuration\ConfigurationContainerInterface;
|
||||
use Codappix\SearchCore\DataProcessing\CopyToProcessor;
|
||||
use Codappix\SearchCore\Domain\Index\TcaIndexer\RelationResolver;
|
||||
use Codappix\SearchCore\Domain\Index\TcaIndexer\TcaTableService;
|
||||
use Codappix\SearchCore\Tests\Unit\AbstractUnitTestCase;
|
||||
|
||||
|
@ -98,4 +100,77 @@ class TcaTableServiceTest extends AbstractUnitTestCase
|
|||
$whereClause->getParameters()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function executesConfiguredDataProcessingWithConfiguration()
|
||||
{
|
||||
$this->configuration->expects($this->exactly(1))
|
||||
->method('get')
|
||||
->with('indexing.testTable.dataProcessing')
|
||||
->will($this->returnValue([
|
||||
'1' => [
|
||||
'_typoScriptNodeValue' => CopyToProcessor::class,
|
||||
'to' => 'new_test_field',
|
||||
],
|
||||
'2' => [
|
||||
'_typoScriptNodeValue' => CopyToProcessor::class,
|
||||
'to' => 'new_test_field2',
|
||||
],
|
||||
]));
|
||||
|
||||
$subject = $this->getMockBuilder(TcaTableService::class)
|
||||
->disableOriginalConstructor()
|
||||
->setMethodsExcept(['prepareRecord'])
|
||||
->getMock();
|
||||
$this->inject($subject, 'configuration', $this->configuration);
|
||||
$this->inject($subject, 'tableName', 'testTable');
|
||||
$this->inject($subject, 'relationResolver', $this->getMockBuilder(RelationResolver::class)->getMock());
|
||||
|
||||
$record = ['field 1' => 'test'];
|
||||
$expectedRecord = $record;
|
||||
$expectedRecord['new_test_field'] = 'test';
|
||||
$expectedRecord['new_test_field2'] = 'test' . PHP_EOL . 'test';
|
||||
|
||||
$subject->prepareRecord($record);
|
||||
|
||||
$this->assertSame(
|
||||
$expectedRecord,
|
||||
$record,
|
||||
'Dataprocessing is not executed by TcaTableService as expected.'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function executesConfiguredDataProcessingWithoutConfiguration()
|
||||
{
|
||||
$this->configuration->expects($this->exactly(1))
|
||||
->method('get')
|
||||
->with('indexing.testTable.dataProcessing')
|
||||
->will($this->returnValue([CopyToProcessor::class]));
|
||||
|
||||
$subject = $this->getMockBuilder(TcaTableService::class)
|
||||
->disableOriginalConstructor()
|
||||
->setMethodsExcept(['prepareRecord'])
|
||||
->getMock();
|
||||
$this->inject($subject, 'configuration', $this->configuration);
|
||||
$this->inject($subject, 'tableName', 'testTable');
|
||||
$this->inject($subject, 'relationResolver', $this->getMockBuilder(RelationResolver::class)->getMock());
|
||||
|
||||
$record = ['field 1' => 'test'];
|
||||
$expectedRecord = $record;
|
||||
$expectedRecord[''] = 'test';
|
||||
$expectedRecord['search_title'] = 'test';
|
||||
|
||||
$subject->prepareRecord($record);
|
||||
|
||||
$this->assertSame(
|
||||
$expectedRecord,
|
||||
$record,
|
||||
'Dataprocessing is not executed by TcaTableService as expected.'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue