diff --git a/README.rst b/README.rst
index d0f6a8d..56ae5fd 100644
--- a/README.rst
+++ b/README.rst
@@ -118,4 +118,26 @@ Core changelog is treated as a "sub manual" of the core manual. To index it, jus
To avoid duplicates search is indexing Core changelog only from "main" version/branch of the core documentation.
E.g. when you run ``./bin/console docsearch:import c/typo3/cms-core/main/`` then the changelog for all versions will be indexed,
-but if you run `./bin/console docsearch:import c/typo3/cms-core/12.4/` the changelog will NOT be indexed.
\ No newline at end of file
+but if you run `./bin/console docsearch:import c/typo3/cms-core/12.4/` the changelog will NOT be indexed.
+
+Excluded and ignored files and folders
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are several files and folders that are excluded from indexing by default.
+You can find them in the ``services.yml`` file in the ``docsearch`` section.
+
+If you want to exclude more files or folders, you can add them to the ``excluded_directories`` section.
+
+There are also specific places in the code where files or folders are ignored.
+
+Inside the ``Manual::getFilesWithSections()`` method, the Finder is configured to ignore several files and folders.
+In the same place if teh indexed packages is ``typo3/cms-core`` the ``Changelog`` folder is excluded from indexing,\
+as it wil be indexed as a part of the TYPO3 core manual (``see Manual::getSubManuals()`` for more details).
+
+Since the ``typo3/cms-core`` is a special package for core manuals, only the manuals from the ``main`` versions should be indexed.\
+TO achieve this the ``DirectoryFinderService::getFolderFilter() ... isNotIgnoredPath()`` method is used.
+It wil check if the processed directory is a ``/c/typo3/cms-core/'`` and if the version is not ``main``, the whole directory (other version) will be ignored.
+
+The ``ImportManualHTMLService::importSectionsFromManual()`` method will check if the file contains.\
+```` meta tag. If such tag exists inside the file, such file will be ignored.
+
diff --git a/src/Dto/Manual.php b/src/Dto/Manual.php
index 5b6e63c..2200582 100644
--- a/src/Dto/Manual.php
+++ b/src/Dto/Manual.php
@@ -67,7 +67,7 @@ public function getFilesWithSections(): Finder
->in($this->getAbsolutePath())
->name('*.html')
->notName(['search.html', 'genindex.html', 'Targets.html', 'Quicklinks.html'])
- ->notPath(['_buildinfo', '_images', '_panels_static', '_sources', '_static', 'singlehtml', 'Sitemap']);
+ ->notPath(['_buildinfo', '_images', '_panels_static', '_sources', '_static', 'singlehtml']);
if ($this->getTitle() === 'typo3/cms-core') {
$finder->notPath('Changelog');
diff --git a/src/Service/ImportManualHTMLService.php b/src/Service/ImportManualHTMLService.php
index 6dcf1db..618fcbc 100644
--- a/src/Service/ImportManualHTMLService.php
+++ b/src/Service/ImportManualHTMLService.php
@@ -36,6 +36,9 @@ private function importSectionsFromManual(Manual $manual): void
$this->dispatcher->dispatch(new ManualStart($files), ManualStart::NAME);
foreach ($files as $file) {
+ if ($this->parser->checkIfMetaTagExistsInFile($file, 'x-typo3-indexer', 'noindex')) {
+ continue;
+ }
$this->importSectionsFromFile($file, $manual);
$this->dispatcher->dispatch(new ManualAdvance(), ManualAdvance::NAME);
}
diff --git a/src/Service/ParseDocumentationHTMLService.php b/src/Service/ParseDocumentationHTMLService.php
index c6563b5..8cce6de 100644
--- a/src/Service/ParseDocumentationHTMLService.php
+++ b/src/Service/ParseDocumentationHTMLService.php
@@ -9,6 +9,22 @@ class ParseDocumentationHTMLService
{
private bool $newRendering = true;
+ public function checkIfMetaTagExistsInFile(SplFileInfo $file, string $name, string $content = null): bool
+ {
+ $fileContent = $file->getContents();
+
+ $selector = sprintf('meta[name="%s"]', $name);
+
+ if ($content !== null) {
+ $selector .= sprintf('[content="%s"]', $content);
+ }
+
+ $crawler = new Crawler($fileContent);
+ $metaTags = $crawler->filter($selector);
+
+ return (bool) $metaTags->count();
+ }
+
public function getSectionsFromFile(SplFileInfo $file): array
{
$fileContents = $file->getContents();
diff --git a/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php b/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php
index 9f47a0c..8636dd3 100644
--- a/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php
+++ b/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php
@@ -12,6 +12,54 @@ class ParseDocumentationHTMLServiceTest extends TestCase
{
use ProphecyTrait;
+ public function testMetaTagExistsByNameOnly(): void
+ {
+ $fileContent = '';
+ $file = $this->prophesize(SplFileInfo::class);
+
+ $file->getContents()->willReturn($fileContent);
+ $subject = new ParseDocumentationHTMLService();
+ $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer');
+
+ $this->assertTrue($result);
+ }
+
+ public function testMetaTagExistsByNameAndContent(): void
+ {
+ $fileContent = '';
+ $file = $this->prophesize(SplFileInfo::class);
+
+ $file->getContents()->willReturn($fileContent);
+ $subject = new ParseDocumentationHTMLService();
+ $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer', 'noindex');
+
+ $this->assertTrue($result);
+ }
+
+ public function testMetaTagDoesNotExistByName(): void
+ {
+ $fileContent = '';
+ $file = $this->prophesize(SplFileInfo::class);
+ $file->getContents()->willReturn($fileContent);
+
+ $subject = new ParseDocumentationHTMLService();
+ $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-version');
+
+ $this->assertFalse($result);
+ }
+
+ public function testMetaTagDoesNotExistByContent(): void
+ {
+ $fileContent = '';
+ $file = $this->prophesize(SplFileInfo::class);
+ $file->getContents()->willReturn($fileContent);
+
+ $subject = new ParseDocumentationHTMLService();
+ $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer', 'noindex');
+
+ $this->assertFalse($result);
+ }
+
/**
* @test
* @throws Exception