Skip to content

Commit

Permalink
NGSTACK configuration modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
Katarina Miočić committed May 9, 2024
1 parent 6dc18e4 commit 1d89850
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 283 deletions.
115 changes: 55 additions & 60 deletions bundle/DependencyInjection/Configuration.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,82 +89,77 @@ private function addPageIndexingSection(ArrayNodeDefinition $nodeDefinition): vo
$nodeDefinition
->children()
->arrayNode('page_indexing')
->addDefaultsIfNotSet()
->info('Page indexing configuration')
->children()
->booleanNode('enabled')
->info('Use layouts page text indexing')
->defaultFalse()
->end()
->arrayNode('site_roots')
->info('Site root ids')
->arrayNode('sites')
->useAttributeAsKey('name')
->normalizeKeys(false)
->validate()
->ifTrue($keyValidator)
->thenInvalid('Site root name must be of string type.')
->end()
->integerPrototype()
->beforeNormalization()->always(static fn ($v) => is_string($v) ? (int)$v : $v)->end()
->info('Content ID')
->end()
->end()
->arrayNode('languages_siteaccess_map')
->info('Language key mapped to page siteaccess')
->useAttributeAsKey('name')
->normalizeKeys(false)
->validate()
->ifTrue($keyValidator)
->thenInvalid('Page name must be of string type.')
->end()
->arrayPrototype()
->useAttributeAsKey('name')
->normalizeKeys(false)
->validate()
->ifTrue($keyValidator)
->thenInvalid('Language code must be of string type.')
->end()
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Siteaccess name must be of string type.')
->children()
->integerNode('tree_root_location_id')
->info('Site root id')
->beforeNormalization()->always(static fn ($v) => is_string($v) ? (int)$v : $v)->end()
->end()
->end()
->end()
->end()
->scalarNode('host')
->info('Host to index page from, defined in .env files')
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Host must be of string type.')
->end()
->end()
->arrayNode('config')
->info('Config for separating page text by importance of the html tags and classes, used to index content to separate solr fields')
->validate()
->ifTrue($keyValidator)
->thenInvalid('Array key (level of field importance) must be of string type.')
->end()
->arrayPrototype()
->useAttributeAsKey('name')
->normalizeKeys(false)
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('HTML selector must be of string type.')
->arrayNode('languages_siteaccess_map')
->info('Language key mapped to page siteaccess')
->useAttributeAsKey('name')
->normalizeKeys(false)
->validate()
->ifTrue($keyValidator)
->thenInvalid('Page name must be of string type.')
->end()
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Siteaccess name must be of string type.')
->end()
->end()
->end()
->arrayNode('fields')
->info('Config for separating page text by importance of the html tags and classes, used to index content to separate solr fields')
->validate()
->ifTrue($keyValidator)
->thenInvalid('Array key (level of field importance) must be of string type.')
->end()
->arrayPrototype()
->useAttributeAsKey('name')
->normalizeKeys(false)
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('HTML selector must be of string type.')
->end()
->end()
->end()
->end()
->arrayNode('allowed_content_types')
->info('Content types to index')
->useAttributeAsKey('name')
->normalizeKeys(false)
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Content type identifier must be of string type.')
->end()
->end()
->end()
->scalarNode('host')
->info('Host to index page from, defined in .env files')
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Host must be of string type.')
->end()
->end()
->end()
->end()
->end()
->arrayNode('allowed_content_types')
->info('Content types to index')
->useAttributeAsKey('name')
->normalizeKeys(false)
->scalarPrototype()
->validate()
->ifTrue(static fn ($v) => !is_string($v))
->thenInvalid('Content type identifier must be of string type.')
->end()
->end()
->end()
->end()
->end();
Expand Down
77 changes: 58 additions & 19 deletions bundle/DependencyInjection/NetgenIbexaSearchExtraExtension.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@

class NetgenIbexaSearchExtraExtension extends Extension implements PrependExtensionInterface
{
private static array $defaultConfiguration = [
'tree_root_location_id' => null,
'languages_siteaccess_map' => [],
'host' => null,
'fields' => [],
'allowed_content_types' => []
];
public function getAlias(): string
{
return 'netgen_ibexa_search_extra';
Expand Down Expand Up @@ -119,30 +126,62 @@ private function processAsynchronousIndexingConfiguration(array $configuration,

private function processPageIndexingConfiguration(array $configuration, ContainerBuilder $container): void
{

$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.site_roots',
$configuration['page_indexing']['site_roots'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.languages_siteaccess_map',
$configuration['page_indexing']['languages_siteaccess_map'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.host',
$configuration['page_indexing']['host'] ?? null,
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.config',
$configuration['page_indexing']['config'] ?? [],
);
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.allowed_content_types',
$configuration['page_indexing']['allowed_content_types'] ?? [],
'netgen_ibexa_search_extra.page_indexing.sites',
$configuration['page_indexing']['sites'] ?? [],
);

$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.enabled',
$configuration['page_indexing']['enabled'] ?? false,
);

if (!$configuration['page_indexing']['enabled']) {
return;
}

if ($configuration['page_indexing']['sites'] === []) {
$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.sites',
[
'default' => self::$defaultConfiguration
]
);
return;
}
foreach ($container->getParameter('netgen_ibexa_search_extra.page_indexing.sites') as $siteName => $config) {
$this->setPageIndexingSitesParameters($configuration, $container, $siteName);
}
}

private function setPageIndexingSitesParameters(array $configuration, ContainerBuilder $container, string $siteName): void
{
/** @var array $pageIndexingSitesConfig */
$pageIndexingSitesConfig = $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites');

if (!array_key_exists('tree_root_location_id', $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites')[$siteName])) {
$pageIndexingSitesConfig[$siteName]['tree_root_location_id'] = null;
}

if (!array_key_exists('languages_siteaccess_map', $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites')[$siteName])) {
$pageIndexingSitesConfig[$siteName]['languages_siteaccess_map'] = [];
}

if (!array_key_exists('host', $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites')[$siteName])) {
$pageIndexingSitesConfig[$siteName]['host'] = null;
}

if (!array_key_exists('fields', $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites')[$siteName])) {
$pageIndexingSitesConfig[$siteName]['fields'] = [];
}

if (!array_key_exists('allowed_content_types', $container->getParameter('netgen_ibexa_search_extra.page_indexing.sites')[$siteName])) {
$pageIndexingSitesConfig[$siteName]['allowed_content_types'] = [];
}

$container->setParameter(
'netgen_ibexa_search_extra.page_indexing.sites',
$pageIndexingSitesConfig,
);
}
}
67 changes: 26 additions & 41 deletions lib/Command/IndexPageContentCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ class IndexPageContentCommand extends Command
protected static $defaultName = 'netgen-search-extra:index-page-content';

/**
* @param array<string> $allowedContentTypes
* @param array<string, mixed> $sitesConfig
*/
public function __construct(
private readonly ContentService $contentService,
private readonly SearchHandler $searchHandler,
private readonly PersistenceHandler $persistenceHandler,
private readonly array $allowedContentTypes,
private readonly array $sitesConfig,
) {
parent::__construct($this::$defaultName);
}
Expand All @@ -57,44 +57,21 @@ protected function configure(): void
*/
protected function execute(InputInterface $input, OutputInterface $output): int
{
$contentIds = $input->getOption('content-ids');

if ($contentIds !== null) {
$this->indexByContentIds($contentIds, $output);
} else {
$this->indexAllContent($output);
foreach ($this->sitesConfig as $siteConfig) {
$this->indexContent($output, $input, $siteConfig);
}

return Command::SUCCESS;
}

/**
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\NotFoundException
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\UnauthorizedException
*/
private function indexByContentIds(mixed $contentIds, OutputInterface $output): int
private function indexContent(OutputInterface $output, InputInterface $input, array $siteConfig): int
{
$contentIds = explode(',', $contentIds);

$totalCount = count($contentIds);
$output->writeln("Number of objects to index: {$totalCount}");

$progressBar = new ProgressBar($output, $totalCount);
$progressBar->start();
foreach ($contentIds as $contentId) {
$content = $this->contentService->loadContent((int)$contentId);
$this->indexContentWithLocations($content);
$progressBar->advance();
}

return Command::SUCCESS;
}
$contentIds = explode(',', $input->getOption('content-ids'));

private function indexAllContent(OutputInterface $output): int
{
$allowedContentTypes = $siteConfig['allowed_content_types'];
$offset = 0;
$limit = 50;
$totalCount = $this->getTotalCount();
$totalCount = $this->getTotalCount($allowedContentTypes, $contentIds);
$progressBar = new ProgressBar($output, $totalCount);

if ($totalCount <= 0) {
Expand All @@ -109,7 +86,7 @@ private function indexAllContent(OutputInterface $output): int
$progressBar->start($totalCount);

while ($offset < $totalCount) {
$chunk = $this->getChunk($limit, $offset);
$chunk = $this->getChunk($limit, $offset, $allowedContentTypes, $contentIds);

$this->processChunk($chunk, $output, $progressBar);

Expand All @@ -128,33 +105,41 @@ private function indexAllContent(OutputInterface $output): int
/**
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\InvalidArgumentException
*/
private function getTotalCount(): int
private function getTotalCount(array $allowedContentTypes, array $contentIds): int
{
$filter = new Filter();
$filter = $this->getFilter($allowedContentTypes, $contentIds);

$filter
->withCriterion(
new Query\Criterion\ContentTypeIdentifier($this->allowedContentTypes)
)
->withLimit(0)
->withOffset(0)
;
->withOffset(0);

return $this->contentService->find($filter)->getTotalCount() ?? 0;
}

/**
* @throws \Ibexa\Contracts\Core\Repository\Exceptions\InvalidArgumentException
*/
private function getChunk(int $limit, int $offset): ContentList
private function getChunk(int $limit, int $offset, array $allowedContentTypes, array $contentIds): ContentList
{
$filter = new Filter();
$filter = $this->getFilter($allowedContentTypes, $contentIds);
$filter
->withLimit($limit)
->withOffset($offset)
;
return $this->contentService->find($filter);
}

private function getFilter(array $allowedContentTypes, array $contentIds = []): Filter
{
$filter = new Filter();
$filter->withCriterion(new Query\Criterion\ContentTypeIdentifier($allowedContentTypes));

if (count($contentIds) > 0) {
$filter->andWithCriterion(new Query\Criterion\ContentId($contentIds));
}
return $filter;
}

private function processChunk(ContentList $contentList, OutputInterface $output, ProgressBar $progressBar): void
{
foreach ($contentList->getIterator() as $content) {
Expand Down
4 changes: 2 additions & 2 deletions lib/Container/Compiler/PageIndexingPass.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ public function process(ContainerBuilder $container)
->register(ContentPageTextFieldMapper::class, ContentPageTextFieldMapper::class)
->setArguments([
new Reference('netgen.ibexa_search_extra.page_indexing.page_text_extractor'),
'%netgen_ibexa_search_extra.page_indexing.allowed_content_types%',
'%netgen_ibexa_search_extra.page_indexing.sites%',
])
->addTag('ibexa.search.solr.field.mapper.content.translation');
}
}
}
Loading

0 comments on commit 1d89850

Please sign in to comment.