diff --git a/README.rst b/README.rst index d0f6a8d..56ae5fd 100644 --- a/README.rst +++ b/README.rst @@ -118,4 +118,26 @@ Core changelog is treated as a "sub manual" of the core manual. To index it, jus To avoid duplicates search is indexing Core changelog only from "main" version/branch of the core documentation. E.g. when you run ``./bin/console docsearch:import c/typo3/cms-core/main/`` then the changelog for all versions will be indexed, -but if you run `./bin/console docsearch:import c/typo3/cms-core/12.4/` the changelog will NOT be indexed. \ No newline at end of file +but if you run `./bin/console docsearch:import c/typo3/cms-core/12.4/` the changelog will NOT be indexed. + +Excluded and ignored files and folders +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are several files and folders that are excluded from indexing by default. +You can find them in the ``services.yml`` file in the ``docsearch`` section. + +If you want to exclude more files or folders, you can add them to the ``excluded_directories`` section. + +There are also specific places in the code where files or folders are ignored. + +Inside the ``Manual::getFilesWithSections()`` method, the Finder is configured to ignore several files and folders. +In the same place if teh indexed packages is ``typo3/cms-core`` the ``Changelog`` folder is excluded from indexing,\ +as it wil be indexed as a part of the TYPO3 core manual (``see Manual::getSubManuals()`` for more details). + +Since the ``typo3/cms-core`` is a special package for core manuals, only the manuals from the ``main`` versions should be indexed.\ +TO achieve this the ``DirectoryFinderService::getFolderFilter() ... isNotIgnoredPath()`` method is used. +It wil check if the processed directory is a ``/c/typo3/cms-core/'`` and if the version is not ``main``, the whole directory (other version) will be ignored. + +The ``ImportManualHTMLService::importSectionsFromManual()`` method will check if the file contains.\ +```` meta tag. If such tag exists inside the file, such file will be ignored. + diff --git a/src/Dto/Manual.php b/src/Dto/Manual.php index 5b6e63c..2200582 100644 --- a/src/Dto/Manual.php +++ b/src/Dto/Manual.php @@ -67,7 +67,7 @@ public function getFilesWithSections(): Finder ->in($this->getAbsolutePath()) ->name('*.html') ->notName(['search.html', 'genindex.html', 'Targets.html', 'Quicklinks.html']) - ->notPath(['_buildinfo', '_images', '_panels_static', '_sources', '_static', 'singlehtml', 'Sitemap']); + ->notPath(['_buildinfo', '_images', '_panels_static', '_sources', '_static', 'singlehtml']); if ($this->getTitle() === 'typo3/cms-core') { $finder->notPath('Changelog'); diff --git a/src/Service/ImportManualHTMLService.php b/src/Service/ImportManualHTMLService.php index 6dcf1db..618fcbc 100644 --- a/src/Service/ImportManualHTMLService.php +++ b/src/Service/ImportManualHTMLService.php @@ -36,6 +36,9 @@ private function importSectionsFromManual(Manual $manual): void $this->dispatcher->dispatch(new ManualStart($files), ManualStart::NAME); foreach ($files as $file) { + if ($this->parser->checkIfMetaTagExistsInFile($file, 'x-typo3-indexer', 'noindex')) { + continue; + } $this->importSectionsFromFile($file, $manual); $this->dispatcher->dispatch(new ManualAdvance(), ManualAdvance::NAME); } diff --git a/src/Service/ParseDocumentationHTMLService.php b/src/Service/ParseDocumentationHTMLService.php index c6563b5..8cce6de 100644 --- a/src/Service/ParseDocumentationHTMLService.php +++ b/src/Service/ParseDocumentationHTMLService.php @@ -9,6 +9,22 @@ class ParseDocumentationHTMLService { private bool $newRendering = true; + public function checkIfMetaTagExistsInFile(SplFileInfo $file, string $name, string $content = null): bool + { + $fileContent = $file->getContents(); + + $selector = sprintf('meta[name="%s"]', $name); + + if ($content !== null) { + $selector .= sprintf('[content="%s"]', $content); + } + + $crawler = new Crawler($fileContent); + $metaTags = $crawler->filter($selector); + + return (bool) $metaTags->count(); + } + public function getSectionsFromFile(SplFileInfo $file): array { $fileContents = $file->getContents(); diff --git a/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php b/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php index 9f47a0c..8636dd3 100644 --- a/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php +++ b/tests/Unit/Service/ParseDocumentationHTMLServiceTest.php @@ -12,6 +12,54 @@ class ParseDocumentationHTMLServiceTest extends TestCase { use ProphecyTrait; + public function testMetaTagExistsByNameOnly(): void + { + $fileContent = ''; + $file = $this->prophesize(SplFileInfo::class); + + $file->getContents()->willReturn($fileContent); + $subject = new ParseDocumentationHTMLService(); + $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer'); + + $this->assertTrue($result); + } + + public function testMetaTagExistsByNameAndContent(): void + { + $fileContent = ''; + $file = $this->prophesize(SplFileInfo::class); + + $file->getContents()->willReturn($fileContent); + $subject = new ParseDocumentationHTMLService(); + $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer', 'noindex'); + + $this->assertTrue($result); + } + + public function testMetaTagDoesNotExistByName(): void + { + $fileContent = ''; + $file = $this->prophesize(SplFileInfo::class); + $file->getContents()->willReturn($fileContent); + + $subject = new ParseDocumentationHTMLService(); + $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-version'); + + $this->assertFalse($result); + } + + public function testMetaTagDoesNotExistByContent(): void + { + $fileContent = ''; + $file = $this->prophesize(SplFileInfo::class); + $file->getContents()->willReturn($fileContent); + + $subject = new ParseDocumentationHTMLService(); + $result = $subject->checkIfMetaTagExistsInFile($file->reveal(), 'x-typo3-indexer', 'noindex'); + + $this->assertFalse($result); + } + /** * @test * @throws Exception