Skip to content

Commit

Permalink
Merge branch '2.10.x' into 2.11.x
Browse files Browse the repository at this point in the history
  • Loading branch information
romainruaud committed Jul 4, 2023
2 parents 776a483 + 5a4b7a2 commit ad46f87
Show file tree
Hide file tree
Showing 19 changed files with 410 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ public function getType();
*/
public function isSearchable();

/**
* Is the field searchable and contains reference (sku) data.
*/
public function isSearchableReference();

/**
* Is the field filterable in navigation.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ interface MappingInterface
const DEFAULT_SEARCH_FIELD = 'search';
const DEFAULT_SPELLING_FIELD = 'spelling';
const DEFAULT_AUTOCOMPLETE_FIELD = 'autocomplete';
const DEFAULT_REFERENCE_FIELD = 'reference';

/**
* List of the properties of the mapping.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,33 @@ public function getSpanSize();
* @return false|int
*/
public function getMinScore();

/**
* Check if the reference collector field should be used instead of the simple 'sku' field
* when building the exact match filter query.
*
* @return bool
*/
public function isUsingReferenceInExactMatchFilter();

/**
* Check if all tokens of the term vectors response should be used.
*
* @return bool
*/
public function isUsingAllTokens();

/**
* Check if the term vectors request should also include the reference analyzer collector field.
*
* @return bool
*/
public function isUsingReferenceAnalyzer();

/**
* If we should use the default analyzer of each field when building the exact match filter query.
*
* @return bool
*/
public function isUsingDefaultAnalyzerInExactMatchFilter();
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,18 @@ public function getQueryText();
* @return float
*/
public function getCutoffFrequency();

/**
* Is the spellcheck request using all tokens returned by the term vectors.
*
* @return boolean
*/
public function isUsingAllTokens();

/**
* Should the spellcheck request target the 'reference' collector field.
*
* @return boolean
*/
public function isUsingReference();
}
6 changes: 6 additions & 0 deletions src/module-elasticsuite-core/Index/Mapping.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class Mapping implements MappingInterface
FieldInterface::ANALYZER_WHITESPACE,
FieldInterface::ANALYZER_SHINGLE,
],
self::DEFAULT_REFERENCE_FIELD => [
FieldInterface::ANALYZER_REFERENCE,
FieldInterface::ANALYZER_WHITESPACE,
FieldInterface::ANALYZER_SHINGLE,
],
];

/**
Expand All @@ -72,6 +77,7 @@ class Mapping implements MappingInterface
private $copyFieldMap = [
'isSearchable' => self::DEFAULT_SEARCH_FIELD,
'isUsedInSpellcheck' => self::DEFAULT_SPELLING_FIELD,
'isSearchableReference' => self::DEFAULT_REFERENCE_FIELD,
];

/**
Expand Down
8 changes: 8 additions & 0 deletions src/module-elasticsuite-core/Index/Mapping/Field.php
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ public function isSearchable(): bool
return (bool) $this->config['is_searchable'];
}

/**
* {@inheritdoc}
*/
public function isSearchableReference(): bool
{
return ($this->isSearchable() && (FieldInterface::ANALYZER_REFERENCE === $this->config['default_search_analyzer']));
}

/**
* {@inheritdoc}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use Smile\ElasticsuiteCore\Api\Index\MappingInterface;
use Smile\ElasticsuiteCore\Api\Index\Mapping\FieldInterface;
use Smile\ElasticsuiteCore\Helper\Cache as CacheHelper;
use Smile\ElasticsuiteCore\Search\Request\RelevanceConfig\App\Config\ScopePool;

/**
* Spellchecker Elasticsearch implementation.
Expand Down Expand Up @@ -69,7 +70,7 @@ public function getSpellingType(RequestInterface $request)

if ($spellingType === false) {
$spellingType = $this->loadSpellingType($request);
$this->cacheHelper->saveCache($cacheKey, $spellingType, [$request->getIndex()]);
$this->cacheHelper->saveCache($cacheKey, $spellingType, [$request->getIndex(), ScopePool::CACHE_TAG]);
}

return $spellingType;
Expand All @@ -89,7 +90,7 @@ private function loadSpellingType(RequestInterface $request)
try {
$cutoffFrequencyLimit = $this->getCutoffrequencyLimit($request);
$termVectors = $this->getTermVectors($request);
$queryTermStats = $this->parseTermVectors($termVectors, $cutoffFrequencyLimit);
$queryTermStats = $this->parseTermVectors($termVectors, $cutoffFrequencyLimit, $request->isUsingAllTokens());

if ($queryTermStats['total'] == $queryTermStats['stop']) {
$spellingType = self::SPELLING_TYPE_PURE_STOPWORDS;
Expand Down Expand Up @@ -163,6 +164,11 @@ private function getTermVectors(RequestInterface $request)
],
];

if ($request->isUsingReference()) {
$doc['fields'][] = MappingInterface::DEFAULT_REFERENCE_FIELD . "." . FieldInterface::ANALYZER_REFERENCE;
$doc['doc'][MappingInterface::DEFAULT_REFERENCE_FIELD] = $request->getQueryText();
}

$docs = [];

// Compute the mtermvector query on all shards to ensure exhaustive results.
Expand All @@ -185,15 +191,18 @@ private function getTermVectors(RequestInterface $request)
* - missing : number of terms of the query not found into the index
* - standard : number of terms of the query found using the standard analyzer.
*
* @param array $termVectors The term vector query response.
* @param int $cutoffFrequencyLimit Cutoff freq (max absolute number of docs to consider term as a stopword).
* @SuppressWarnings(PHPMD.BooleanArgumentFlag)
*
* @param array $termVectors The term vector query response.
* @param int $cutoffFrequencyLimit Cutoff freq (max absolute number of docs to consider term as a stopword).
* @param boolean $useAllTokens Whether to use all tokens or not
*
* @return array
*/
private function parseTermVectors($termVectors, $cutoffFrequencyLimit)
private function parseTermVectors($termVectors, $cutoffFrequencyLimit, $useAllTokens = false)
{
$queryTermStats = ['stop' => 0, 'exact' => 0, 'standard' => 0, 'missing' => 0];
$statByPosition = $this->extractTermStatsByPosition($termVectors);
$statByPosition = $this->extractTermStatsByPosition($termVectors, $useAllTokens);

foreach ($statByPosition as $positionStat) {
$type = 'missing';
Expand All @@ -203,6 +212,8 @@ private function parseTermVectors($termVectors, $cutoffFrequencyLimit)
$type = 'stop';
} elseif (in_array(FieldInterface::ANALYZER_WHITESPACE, $positionStat['analyzers'])) {
$type = 'exact';
} elseif (in_array(FieldInterface::ANALYZER_REFERENCE, $positionStat['analyzers'])) {
$type = 'exact';
}
}
$queryTermStats[$type]++;
Expand All @@ -215,18 +226,20 @@ private function parseTermVectors($termVectors, $cutoffFrequencyLimit)

/**
* Extract term stats by position from a term vectors query response.
* Wil return an array of doc_freq, analayzers and term by position.
* Will return an array of doc_freq, analyzers and term by position.
*
* @SuppressWarnings(PHPMD.CyclomaticComplexity)
* @SuppressWarnings(PHPMD.BooleanArgumentFlag)
*
* @param array $termVectors The term vector query response.
* @param array $termVectors The term vector query response.
* @param boolean $useAllTokens Whether to use all tokens returned in the term vector response.
*
* @return array
*/
private function extractTermStatsByPosition($termVectors)
private function extractTermStatsByPosition($termVectors, $useAllTokens = false)
{
$statByPosition = [];
$analyzers = [FieldInterface::ANALYZER_STANDARD, FieldInterface::ANALYZER_WHITESPACE];
$analyzers = [FieldInterface::ANALYZER_STANDARD, FieldInterface::ANALYZER_WHITESPACE, FieldInterface::ANALYZER_REFERENCE];

if (is_array($termVectors) && isset($termVectors['docs'])) {
foreach ($termVectors['docs'] as $termVector) {
Expand All @@ -236,6 +249,9 @@ private function extractTermStatsByPosition($termVectors)
foreach ($fieldData['terms'] as $term => $termStats) {
foreach ($termStats['tokens'] as $token) {
$positionKey = $token['position'];
if ($useAllTokens) {
$positionKey = "{$token['position']}_{$token['start_offset']}_{$token['end_offset']}";
}

if (!isset($termStats['doc_freq'])) {
$termStats['doc_freq'] = 0;
Expand Down Expand Up @@ -265,7 +281,7 @@ private function extractTermStatsByPosition($termVectors)
}

/**
* Extract analayser from a mapping property name.
* Extract analyser from a mapping property name.
*
* @param string $propertyName Property name (eg. : search.whitespace)
*
Expand Down
2 changes: 2 additions & 0 deletions src/module-elasticsuite-core/Search/Request/Builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ private function getSpellingType(ContainerConfigurationInterface $containerConfi
'index' => $containerConfig->getIndexName(),
'queryText' => $queryText,
'cutoffFrequency' => $containerConfig->getRelevanceConfig()->getCutOffFrequency(),
'isUsingAllTokens' => $containerConfig->getRelevanceConfig()->isUsingAllTokens(),
'isUsingReference' => $containerConfig->getRelevanceConfig()->isUsingReferenceAnalyzer(),
];

$spellcheckRequest = $this->spellcheckRequestFactory->create($spellcheckRequestParams);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,50 @@ class RelevanceConfig implements RelevanceConfigurationInterface
*/
private $minScore;

/**
* @var boolean
*/
private $useReferenceInExactMatchFilter;

/**
* @var boolean
*/
private $useDefaultAnalyzerInExactMatchFilter;

/**
* @var boolean
*/
private $useAllTokens;

/**
* @var boolean
*/
private $useReferenceAnalyzer;

/**
* RelevanceConfiguration constructor.
*
* @SuppressWarnings(PHPMD.BooleanArgumentFlag)
* @SuppressWarnings(PHPMD.ExcessiveParameterList)
*
* @param string $minimumShouldMatch Minimum should match clause of the text query.
* @param float $tieBreaker Tie breaker for multimatch queries.
* @param int|null $phraseMatchBoost The Phrase match boost value, or null if not
* enabled
* @param float $cutOffFrequency The cutoff Frequency value
* @param FuzzinessConfigurationInterface|null $fuzziness The fuzziness Configuration, or null
* @param boolean $enablePhoneticSearch The phonetic Configuration, or null
* @param int|null $spanMatchBoost The Span match boost value, or null if not
* enabled
* @param int|null $spanSize The number of terms to match in span queries
* @param int|null $minScore The Min Score value, or null if not enabled
* @param string $minimumShouldMatch Minimum should match clause of the text query.
* @param float $tieBreaker Tie breaker for multimatch queries.
* @param int|null $phraseMatchBoost The Phrase match boost value, or null if not
* enabled
* @param float $cutOffFrequency The cutoff Frequency value
* @param FuzzinessConfigurationInterface|null $fuzziness The fuzziness Configuration, or null
* @param boolean $enablePhoneticSearch The phonetic Configuration, or null
* @param int|null $spanMatchBoost The Span match boost value, or null if not
* enabled
* @param int|null $spanSize The number of terms to match in span queries
* @param int|null $minScore The Min Score value, or null if not enabled
* @param boolean $useReferenceInExactMatchFilter Whether to use the reference collector field
* instead of 'sku' field in the exact match filter
* @param boolean $useDefaultAnalyzerInExactMatchFilter Whether to use 'field' or 'field.default_analyzer'
* in the exact match filter query
* @param boolean $useAllTokens Whether to take into account all term vector tokens
* @param boolean $useReferenceAnalyzer Whether to include the collector field associated
* with the reference analyzer in term vectors request
*/
public function __construct(
$minimumShouldMatch,
Expand All @@ -96,7 +124,11 @@ public function __construct(
$enablePhoneticSearch = false,
$spanMatchBoost = null,
$spanSize = null,
$minScore = null
$minScore = null,
$useReferenceInExactMatchFilter = false,
$useDefaultAnalyzerInExactMatchFilter = false,
$useAllTokens = false,
$useReferenceAnalyzer = false
) {
$this->minimumShouldMatch = $minimumShouldMatch;
$this->tieBreaker = $tieBreaker;
Expand All @@ -107,6 +139,10 @@ public function __construct(
$this->spanMatchBoost = $spanMatchBoost;
$this->spanSize = $spanSize;
$this->minScore = $minScore;
$this->useReferenceInExactMatchFilter = $useReferenceInExactMatchFilter;
$this->useAllTokens = $useAllTokens;
$this->useReferenceAnalyzer = $useReferenceAnalyzer;
$this->useDefaultAnalyzerInExactMatchFilter = $useDefaultAnalyzerInExactMatchFilter;
}

/**
Expand Down Expand Up @@ -194,4 +230,36 @@ public function getMinScore()
{
return (int) $this->minScore;
}

/**
* {@inheritDoc}
*/
public function isUsingReferenceInExactMatchFilter()
{
return (bool) $this->useReferenceInExactMatchFilter;
}

/**
* {@inheritDoc}
*/
public function isUsingDefaultAnalyzerInExactMatchFilter()
{
return (bool) $this->useDefaultAnalyzerInExactMatchFilter;
}

/**
* {@inheritDoc}
*/
public function isUsingAllTokens()
{
return (bool) $this->useAllTokens;
}

/**
* {@inheritDoc}
*/
public function isUsingReferenceAnalyzer()
{
return (bool) $this->useReferenceAnalyzer;
}
}
Loading

0 comments on commit ad46f87

Please sign in to comment.