Skip to content

Commit

Permalink
NGSTACK-834 page indexing implementation from fina + FieldMapper impl…
Browse files Browse the repository at this point in the history
…ementation for elasticsearch
  • Loading branch information
Katarina Miočić committed Apr 15, 2024
1 parent 9af5bf6 commit bee2cf8
Show file tree
Hide file tree
Showing 31 changed files with 1,491 additions and 4 deletions.
45 changes: 45 additions & 0 deletions bundle/DependencyInjection/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Netgen\Bundle\IbexaSearchExtraBundle\DependencyInjection;

use Ibexa\Contracts\Core\Repository\LanguageService;
use Symfony\Component\Config\Definition\Builder\ArrayNodeDefinition;
use Symfony\Component\Config\Definition\Builder\TreeBuilder;
use Symfony\Component\Config\Definition\ConfigurationInterface;
Expand All @@ -25,6 +26,8 @@ public function getConfigTreeBuilder(): TreeBuilder
$this->addIndexableFieldTypeSection($rootNode);
$this->addSearchResultExtractorSection($rootNode);
$this->addAsynchronousIndexingSection($rootNode);
$this->addUsePageIndexingSection($rootNode);
$this->addPageIndexingSection($rootNode);

return $treeBuilder;
}
Expand Down Expand Up @@ -73,4 +76,46 @@ private function addAsynchronousIndexingSection(ArrayNodeDefinition $nodeDefinit
->end()
->end();
}

private function addUsePageIndexingSection(ArrayNodeDefinition $nodeDefinition): void
{
$nodeDefinition
->children()
->booleanNode('use_page_indexing')
->info('Use layouts page text indexing')
->defaultFalse()
->end()
->end();
}

private function addPageIndexingSection(ArrayNodeDefinition $nodeDefinition): void
{
$nodeDefinition
->children()
->arrayNode('page_indexing')
->info('Page indexing configuration')
->children()
->arrayNode('site_roots')
->scalarPrototype()->end()
->info('Location ids of site roots')
->end()
->arrayNode('languages_siteaccess_map')
->prototype('array')
->prototype('scalar')->end()
->end()
->end()
->scalarNode('host')
->end()
->arrayNode('config')
->prototype('array')
->prototype('scalar')->end()
->end()
->end()
->arrayNode('allowed_content_types')
->scalarPrototype()->end()
->end()
->end()
->end();
}

}
36 changes: 34 additions & 2 deletions bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ private function loadBundleSolrEngine(ContainerBuilder $container): void
private function processExtensionConfiguration(array $configs, ContainerBuilder $container): void
{
$configuration = $this->getConfiguration($configs, $container);

$configuration = $this->processConfiguration($configuration, $configs);

$this->processIndexableFieldTypeConfiguration($configuration, $container);
$this->processSearchResultExtractorConfiguration($configuration, $container);
$this->processAsynchronousIndexingConfiguration($configuration, $container);
$this->processUsePageIndexingConfiguration($configuration, $container);
$this->processPageIndexingConfiguration($configuration, $container);
}

private function processSearchResultExtractorConfiguration(array $configuration, ContainerBuilder $container): void
Expand Down Expand Up @@ -117,4 +117,36 @@ private function processAsynchronousIndexingConfiguration(array $configuration,
$configuration['use_asynchronous_indexing'],
);
}

private function processUsePageIndexingConfiguration(array $configuration, ContainerBuilder $container): void
{
$container->setParameter(
'netgen_ibexa_search_extra.use_page_indexing',
$configuration['use_page_indexing'],
);
}

private function processPageIndexingConfiguration(array $configuration, ContainerBuilder $container): void
{
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.site_roots',
$configuration['page_indexing']['site_roots'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.languages_siteaccess_map',
$configuration['page_indexing']['languages_siteaccess_map'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.host',
$configuration['page_indexing']['host'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.config',
$configuration['page_indexing']['config'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.allowed_content_types',
$configuration['page_indexing']['allowed_content_types'] ?? [],
);
}
}
2 changes: 2 additions & 0 deletions bundle/NetgenIbexaSearchExtraBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,7 @@ public function build(ContainerBuilder $container): void
$container->addCompilerPass(new Compiler\FieldType\RichTextIndexablePass());
$container->addCompilerPass(new Compiler\SearchResultExtractorPass());
$container->addCompilerPass(new Compiler\RawFacetBuilderDomainVisitorPass());
$container->addCompilerPass(new Compiler\LayoutsPageIndexingPass());
$container->addCompilerPass(new Compiler\AggregateElasticsearchContentFieldMapperMapperPass());
}
}
12 changes: 10 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"ext-dom": "*",
"ibexa/core": "^4.6",
"symfony/messenger": "^5.4",
"symfony/proxy-manager-bridge": "^5.4"
"symfony/proxy-manager-bridge": "^5.4",
"ext-libxml": "*",
"ext-curl": "*"
},
"require-dev": {
"ibexa/fieldtype-richtext": "^4.5",
Expand All @@ -30,7 +32,8 @@
},
"suggest": {
"netgen/ibexa-site-api": "Boost your site-building productivity with Ibexa CMS",
"ibexa/solr": "Supports advanced capabilities with Ibexa search API"
"ibexa/solr": "Supports advanced capabilities with Ibexa search API",
"ibexa/elasticsearch": "Needed for layouts indexer"
},
"autoload": {
"psr-4": {
Expand All @@ -51,5 +54,10 @@
},
"scripts": {
"test": "@php vendor/bin/phpunit --colors=always"
},
"config": {
"allow-plugins": {
"php-http/discovery": false
}
}
}
170 changes: 170 additions & 0 deletions lib/Command/IndexPageContentCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
<?php

declare(strict_types=1);

namespace Netgen\IbexaSearchExtra\Command;

use Ibexa\Contracts\Core\Persistence\Handler as PersistenceHandler;
use Ibexa\Contracts\Core\Repository\ContentService;
use Ibexa\Contracts\Core\Repository\Exceptions\InvalidArgumentException;
use Ibexa\Contracts\Core\Repository\Exceptions\NotFoundException;
use Ibexa\Contracts\Core\Repository\Exceptions\UnauthorizedException;
use Ibexa\Contracts\Core\Repository\Values\Content\Content;
use Ibexa\Contracts\Core\Repository\Values\Content\Query;
use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion;
use Ibexa\Contracts\Core\Repository\Values\Filter\Filter;
use Ibexa\Contracts\Core\Search\Handler as SearchHandler;
use Netgen\IbexaSearchExtra\Exception\IndexPageUnavailableException;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Helper\ProgressBar;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Ibexa\Contracts\Core\Repository\Values\Content\ContentList;

use function count;
use function explode;

class IndexPageContentCommand extends Command
{
protected static $defaultName = 'netgen-search-extra:index-page-content';

/**
* @param ContentService $contentService
* @param SearchHandler $searchHandler
* @param PersistenceHandler $persistenceHandler
* @param array<string> $allowedContentTypes
*/
public function __construct(
private readonly ContentService $contentService,
private readonly SearchHandler $searchHandler,
private readonly PersistenceHandler $persistenceHandler,
private readonly array $allowedContentTypes,
) {
parent::__construct($this::$defaultName);
}

protected function configure(): void
{
$this
->setDescription('Index content related through layouts')
->addOption(
'content-ids',
null,
InputOption::VALUE_REQUIRED,
'Comma separated list of content id\'s of content to index.',
);
}

/**
* @throws NotFoundException
* @throws InvalidArgumentException
* @throws UnauthorizedException
*/
protected function execute(InputInterface $input, OutputInterface $output): int
{
$contentIds = $input->getOption('content-ids');
if ($contentIds !== null) {
$contentIds = explode(',', $contentIds);

$totalCount = count($contentIds);
$output->writeln("Number of objects to index: {$totalCount}");

$progressBar = new ProgressBar($output, $totalCount);
$progressBar->start();
foreach ($contentIds as $contentId) {
$content = $this->contentService->loadContent((int) $contentId);
$this->indexContentWithLocations($content);
$progressBar->advance();
}
} else {
$query = new Query();
$offset = 0;
$limit = 50;
$query->query = new Criterion\ContentTypeIdentifier($this->allowedContentTypes);
$totalCount = $this->getTotalCount($query);
$progressBar = new ProgressBar($output, $totalCount);

if ($totalCount <= 0) {
$output->writeln('No content found to index, exiting.');

return Command::SUCCESS;
}

$output->writeln('Found ' . $totalCount . ' content objects...');
$output->writeln('');

$progressBar->start($totalCount);

while ($offset < $totalCount) {
$chunk = $this->getChunk($query, $limit, $offset);

$this->processChunk($chunk, $output, $progressBar);

$offset += $limit;
}

$progressBar->finish();

$output->writeln('');
$output->writeln('');
$output->writeln('Finished.');
}

return Command::SUCCESS;
}

/**
* @throws InvalidArgumentException
*/
private function getTotalCount(Query $query): int
{
$filter = new Filter();
$filter
->withCriterion(
new Query\Criterion\ContentTypeIdentifier($this->allowedContentTypes)
)
->withLimit(0)
->withOffset(0)
;

return $this->contentService->find($filter)->getTotalCount() ?? 0;
}

/**
* @throws InvalidArgumentException
*/
private function getChunk(Query $query, int $limit, int $offset): ContentList
{
$filter = new Filter();
$filter
->withLimit($limit)
->withOffset($offset)
;
return $this->contentService->find($filter);
}

private function processChunk(ContentList $contentList, OutputInterface $output, ProgressBar $progressBar): void
{
foreach ($contentList->getIterator() as $content) {
try {
//$this->indexContentWithLocations($content);
$progressBar->advance();
} catch (IndexPageUnavailableException $exception) {
$output->writeln($exception->getMessage());
}
}
}

private function indexContentWithLocations(Content $content): void
{
$this->searchHandler->indexContent(
$this->persistenceHandler->contentHandler()->load($content->id, $content->versionInfo->versionNo),
);

$locations = $this->persistenceHandler->locationHandler()->loadLocationsByContent($content->id);
foreach ($locations as $location) {
$this->searchHandler->indexLocation($location);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

declare(strict_types=1);

namespace Netgen\IbexaSearchExtra\Container\Compiler;

use Symfony\Component\DependencyInjection\Compiler\CompilerPassInterface;
use Symfony\Component\DependencyInjection\ContainerBuilder;
use Symfony\Component\DependencyInjection\Definition;
use Symfony\Component\DependencyInjection\Reference;
use function array_keys;

/**
* This compiler pass will register Content translation subdocument mappers.
*
* @see \Netgen\IbexaSearchExtra\Core\Search\Solr\SubdocumentMapper\ContentTranslationSubdocumentMapper
* @see \Netgen\IbexaSearchExtra\Core\Search\Solr\SubdocumentMapper\ContentTranslationSubdocumentMapper\Aggregate
*/
final class AggregateElasticsearchContentFieldMapperMapperPass implements CompilerPassInterface
{
public function process(ContainerBuilder $container): void
{
$this->processVisitors($container, 'block_translation');
$this->processVisitors($container, 'block');
$this->processVisitors($container, 'content');
$this->processVisitors($container, 'content_translation');
$this->processVisitors($container, 'location');
$this->processVisitors($container, 'location_translation');
}

private function processVisitors(ContainerBuilder $container, string $name): void
{
if (!$container->hasDefinition(sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s.aggregate', $name))) {
return;
}

$aggregateDefinition = $container->getDefinition(
sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s.aggregate', $name),
);

$this->registerMappers($aggregateDefinition, $container->findTaggedServiceIds(sprintf('netgen.ibexa_search_extra.elasticsearch.field_mapper.%s', $name)));
}

private function registerMappers(Definition $definition, array $mapperIds): void
{
foreach (array_keys($mapperIds) as $id) {
$definition->addMethodCall('addMapper', [new Reference($id)]);
}
}
}
Loading

0 comments on commit bee2cf8

Please sign in to comment.