Fix removal of still used files (#27)

Simplify SQL queries and move logic to PHP.
This commit is contained in:
Daniel Siepmann 2023-06-20 11:56:59 +02:00 committed by GitHub
parent c56a10b748
commit 0f7323eac3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 178 additions and 42 deletions

View file

@ -135,44 +135,13 @@ class Files
private function deleteFilesWithoutProperReference(): void
{
$queryBuilder = $this->connectionPool->getQueryBuilderForTable('sys_file');
$queryBuilder->getRestrictions()->removeAll();
$queryBuilder
->select('file.identifier', 'file.storage', 'file.uid')
->addSelectLiteral('SUM(' . $queryBuilder->expr()->eq('reference.deleted', 1) . ') AS deleted_sum')
->from('sys_file', 'file')
->leftJoin(
'file',
'sys_file_reference',
'reference',
'reference.uid_local = file.uid'
)
->where($queryBuilder->expr()->like(
'reference.tablenames',
$queryBuilder->createNamedParameter('tx_events_domain_model_%')
))
->orWhere($queryBuilder->expr()->eq(
'reference.tablenames',
$queryBuilder->createNamedParameter('')
))
->groupBy('file.uid')
->having(
$queryBuilder->expr()->eq(
'deleted_sum',
$queryBuilder->expr()->count('*')
)
)
;
/** @var array{int: array{storage: int, identifier: string, uid: int}} $filesToDelete */
$filesToDelete = $queryBuilder->execute()->fetchAll();
$filesToDelete = $this->filterPotentialFilesToDelete($this->getPotentialFilesToDelete());
$uidsToRemove = [];
foreach ($filesToDelete as $fileToDelete) {
$this->deleteFromFal((int)$fileToDelete['storage'], (string)$fileToDelete['identifier']);
$uidsToRemove[] = (int)$fileToDelete['uid'];
foreach ($filesToDelete as $file) {
$this->deleteFromFal((int)$file['storage'], (string)$file['identifier']);
}
$this->deleteFromDb(...$uidsToRemove);
$this->deleteFromDb(...array_keys($filesToDelete));
}
private function deleteFromFal(int $storageUid, string $filePath): void
@ -228,4 +197,79 @@ class Files
;
$queryBuilder->execute();
}
/**
* @return array<int, array{storage: int, identifier: string}> Index is file uid.
*/
private function getPotentialFilesToDelete(): array
{
$queryBuilder = $this->connectionPool->getQueryBuilderForTable('sys_file');
$queryBuilder->getRestrictions()->removeAll();
$queryBuilder
->select('file.uid', 'file.storage', 'file.identifier')
->from('sys_file', 'file')
->leftJoin(
'file',
'sys_file_reference',
'reference',
'reference.uid_local = file.uid'
)
->where($queryBuilder->expr()->like(
'reference.tablenames',
$queryBuilder->createNamedParameter('tx_events_domain_model_%')
))
->orWhere($queryBuilder->expr()->eq(
'reference.tablenames',
$queryBuilder->createNamedParameter('')
))
->groupBy('file.uid')
;
return $queryBuilder->execute()->fetchAllAssociativeIndexed();
}
/**
* @param array<int, array{storage: int, identifier: string}> $files
* @return array<int, array{storage: int, identifier: string}> Index is file uid.
*/
private function filterPotentialFilesToDelete(array $files): array
{
$filesToDelete = [];
$filesToKeep = [];
$queryBuilder = $this->connectionPool->getQueryBuilderForTable('sys_file');
$queryBuilder->getRestrictions()->removeAll();
$queryBuilder
->select('*')
->from('sys_file_reference', 'reference')
->where($queryBuilder->expr()->in(
'uid_local',
$queryBuilder->createNamedParameter(array_keys($files), Connection::PARAM_INT_ARRAY)
))
;
foreach ($queryBuilder->execute() as $reference) {
$file = [];
$fileUid = (int) $reference['uid_local'];
if (
(
str_starts_with($reference['tablenames'], 'tx_events_domain_model_')
|| $reference['tablenames'] === ''
) && $reference['deleted'] == 1
) {
$file = $files[$fileUid] ?? [];
} else {
$filesToKeep[$fileUid] = $fileUid;
}
if ($file === []) {
continue;
}
$filesToDelete[$fileUid] = $file;
}
return array_diff_key($filesToDelete, $filesToKeep);
}
}

View file

@ -64,6 +64,11 @@ Fixes
The time range where this can happen is now reduced as slugs for each event and
date is generated right after saving each of them.
* Improve deletion of files and their relations.
The used database query didn't work as expected and could result in data loss.
There are now two database queries and the logic is moved to PHP.
Furthermore, the test cases were extended with another situation.
Tasks
-----

View file

@ -7,6 +7,7 @@ return [
'uid' => '1',
'title' => 'Root page',
'slug' => '1',
'media' => '1',
],
[
'pid' => '1',
@ -184,6 +185,26 @@ return [
'identifier_hash' => '475768e491580fb8b74ed36c2b1aaf619ca5e11d',
'folder_hash' => 'b4ab666a114d9905a50606d1837b74d952dfd90f',
],
[
'uid' => '6',
'pid' => '0',
'tstamp' => '1371467047',
'type' => '2',
'storage' => '1',
'identifier' => '/user_uploads/example-events-image-used-somewhere-else.gif',
'extension' => 'gif',
'mime_type' => 'image/gif',
'name' => 'example-events-image-used-somewhere-else.gif',
'sha1' => '359ae0fb420fe8afe1a8b8bc5e46d75090a826b9',
'size' => '637',
'creation_date' => '1370877201',
'modification_date' => '1369407629',
'last_indexed' => '0',
'missing' => '0',
'metadata' => '0',
'identifier_hash' => '475768e491580fb8b74ed36c2b1aaf619ca5e11d',
'folder_hash' => 'b4ab666a114d9905a50606d1837b74d952dfd90f',
],
],
'sys_file_metadata' => [
[
@ -226,6 +247,14 @@ return [
'cruser_id' => '1',
'file' => '5',
],
[
'uid' => '6',
'pid' => '0',
'tstamp' => '1371467047',
'crdate' => '1371467047',
'cruser_id' => '1',
'file' => '6',
],
],
'sys_file_reference' => [
[
@ -324,6 +353,38 @@ return [
'sorting_foreign' => '0',
'table_local' => 'sys_file',
],
[
'uid' => '7',
'pid' => '0',
'tstamp' => '1373537480',
'crdate' => '1371484347',
'cruser_id' => '1',
'deleted' => '0',
'hidden' => '0',
'sys_language_uid' => '0',
'uid_local' => '6',
'uid_foreign' => '0',
'tablenames' => 'pages',
'fieldname' => 'media',
'sorting_foreign' => '0',
'table_local' => 'sys_file',
],
[
'uid' => '8',
'pid' => '0',
'tstamp' => '1373537480',
'crdate' => '1371484347',
'cruser_id' => '1',
'deleted' => '0',
'hidden' => '0',
'sys_language_uid' => '0',
'uid_local' => '6',
'uid_foreign' => '1',
'tablenames' => 'tx_events_domain_model_event',
'fieldname' => 'images',
'sorting_foreign' => '2',
'table_local' => 'sys_file',
],
],
'tx_events_domain_model_region' => [
[

View file

@ -70,26 +70,27 @@ class RemovePastTest extends AbstractFunctionalTestCase
);
self::assertCount(
3,
4,
$this->getAllRecords('sys_file'),
'Unexpected number of sys_file records.'
);
self::assertCount(
3,
4,
$this->getAllRecords('sys_file_reference'),
'Unexpected number of sys_file_reference records.'
);
self::assertCount(
3,
4,
$this->getAllRecords('sys_file_metadata'),
'Unexpected number of sys_file_metadata records.'
);
$files = GeneralUtility::getFilesInDir('fileadmin/user_uploads');
self::assertIsArray($files, 'Failed to retrieve files from filesystem.');
self::assertCount(3, $files, 'Unexpected number of files in filesystem.');
self::assertSame('example-for-future-event.gif', array_values($files)[0], 'Unexpected file in filesystem.');
self::assertSame('example-for-partner.gif', array_values($files)[1], 'Unexpected file in filesystem.');
self::assertSame('example-to-keep.gif', array_values($files)[2], 'Unexpected file in filesystem.');
self::assertCount(4, $files, 'Unexpected number of files in filesystem.');
self::assertSame('example-events-image-used-somewhere-else.gif', array_values($files)[0], 'Unexpected file in filesystem.');
self::assertSame('example-for-future-event.gif', array_values($files)[1], 'Unexpected file in filesystem.');
self::assertSame('example-for-partner.gif', array_values($files)[2], 'Unexpected file in filesystem.');
self::assertSame('example-to-keep.gif', array_values($files)[3], 'Unexpected file in filesystem.');
}
}

View file

@ -45,6 +45,26 @@ parameters:
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Argument of an invalid type Doctrine\\\\DBAL\\\\Result\\|int supplied for foreach, only iterables are supported\\.$#"
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Argument of an invalid type Doctrine\\\\DBAL\\\\Driver\\\\ResultStatement\\|int supplied for foreach, only iterables are supported\\.$#"
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Cannot call method fetchAllAssociativeIndexed\\(\\) on Doctrine\\\\DBAL\\\\Result\\|int\\.$#"
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Method Wrm\\\\Events\\\\Service\\\\Cleanup\\\\Files\\:\\:getPotentialFilesToDelete\\(\\) should return array\\<int, array\\{storage\\: int, identifier\\: string\\}\\> but returns array\\<int\\|string, array\\<string, mixed\\>\\>\\.$#"
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Method Wrm\\\\Events\\\\Service\\\\DestinationDataImportService\\\\CategoriesAssignment\\:\\:getCategories\\(\\) should return TYPO3\\\\CMS\\\\Extbase\\\\Persistence\\\\ObjectStorage\\<Wrm\\\\Events\\\\Domain\\\\Model\\\\Category\\> but returns TYPO3\\\\CMS\\\\Extbase\\\\Persistence\\\\ObjectStorage\\<mixed\\>\\.$#"
count: 2
@ -65,6 +85,11 @@ parameters:
count: 1
path: Classes/Service/DestinationDataImportService/FilesAssignment.php
-
message: "#^Cannot call method fetchAllAssociativeIndexed\\(\\) on Doctrine\\\\DBAL\\\\Driver\\\\ResultStatement\\|int\\.$#"
count: 1
path: Classes/Service/Cleanup/Files.php
-
message: "#^Argument of an invalid type Doctrine\\\\DBAL\\\\Result\\|int supplied for foreach, only iterables are supported\\.$#"
count: 1