Skip to content

Commit

Permalink
Improve db check for duplicates
Browse files Browse the repository at this point in the history
Resolves: #13
  • Loading branch information
sypets committed Jan 21, 2025
1 parent 191e8c6 commit 876fb39
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 2 deletions.
11 changes: 9 additions & 2 deletions Classes/Command/UnduplicateCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,7 @@ public function findDuplicates(mixed $onlyThisIdentifier, int $onlyThisStorage):
{
$queryBuilder = $this->connectionPool->getQueryBuilderForTable('sys_file');
$queryBuilder->count('*')
->addSelect('identifier', 'storage')
->from('sys_file')
->groupBy('identifier', 'storage')
->having('COUNT(*) > 1');
$whereExpressions = [];
if ($onlyThisIdentifier) {
Expand All @@ -223,6 +221,15 @@ public function findDuplicates(mixed $onlyThisIdentifier, int $onlyThisStorage):
if ($whereExpressions) {
$queryBuilder->where(...$whereExpressions);
}

$concreteQueryBuilder = $queryBuilder->getConcreteQueryBuilder();

$concreteQueryBuilder->groupBy('BINARY identifier');
$concreteQueryBuilder->addGroupBy('storage');
$concreteQueryBuilder->addSelect('MAX(identifier) AS identifier', 'storage');

$this->output->writeln('sql=' . $queryBuilder->getSQL(), OutputInterface::VERBOSITY_VERBOSE);

$statement = $queryBuilder
->executeQuery();
return $statement;
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,23 @@ Finds and fixes duplicates of sys_file entries pointing to the same file. Merges
Tested successfully with TYPO3 v12.

## Warning

Older versions for TYPO3 v8 may not consider identifiers with mixed case or sys_file
entries on several storages (sys_file.storage) correctly, see issue https://github.com/ElementareTeilchen/unduplicator/issues/2

## Portabilty (database)

In order to test for duplicates, a database command like this is used:

```sql
SELECT COUNT(*), MAX(identifier) AS identifier, storage FROM `sys_file` GROUP BY BINARY identifier, storage HAVING COUNT(*) > 1;
```

Therefore, it is necessary, that the underlying database engines support MAX and BINARY. This command was tested with the following:

* MariaDB
* MySQL

## Usage
We strongly recommend to run the **reference index update** (before and after):
If not run before or the references are out of date, some references may be overlooked and a sys_file entry deleted which has references.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"sys_file",,,
,"uid","identifier","storage"
,1,"/test/abc.jpg",1
,2,"/test/ABC.jpg",1
,3,"/test/ABC.jpg",1
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"sys_file",,,
,"uid","identifier","storage"
,1,"/test/abc.jpg",1
,2,"/test/ABC.jpg",1
15 changes: 15 additions & 0 deletions Tests/Functional/Command/UnduplicateCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@ class UnduplicateCommandTest extends FunctionalTestCase
self::assertEquals(0, $result['status']);
}

/**
* * abc.jpg
* * ABC.jpg
* * ABC.jpg
*/
#[Test] public function unduplicateCommandFixesDuplicatesWithMixCaseSensitives(): void
{
$this->importCSVDataSet(__DIR__ . '/DataSet/sys_file_duplicates_mix_casesensitive.csv');

$result = $this->executeConsoleCommand(self::BASE_COMMAND);

$this->assertCSVDataSet(__DIR__ . '/DataSet/sys_file_duplicates_mix_casesensitive_RESULT.csv');
self::assertEquals(0, $result['status']);
}

#[Test] public function unduplicateCommandFixesDuplicatesWithReferences(): void
{
$this->importCSVDataSet(__DIR__ . '/DataSet/sys_file_duplicates_with_references.csv');
Expand Down

0 comments on commit 876fb39

Please sign in to comment.