Skip to content

Commit

Permalink
tagging: move mimetype to metadata, add orphaned tag
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewhilton committed Sep 2, 2024
1 parent 87327fc commit c34bbbd
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 81 deletions.
15 changes: 12 additions & 3 deletions TAGGING.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ The following sources are implemented currently:
### Environment
What environment the file was uploaded in. Configure the environment using `$CFG->objectfs_environment_name`

### Mimetype
What mimetype the file is stored as under the `mdl_files` table.
If a file becomes orphaned (e.g. deleted in Moodle), the object will be updated with the environment value `orphan`.

## Multiple environments pointing to single bucket
It is possible you are using objectfs with multiple environments (e.g. prod, staging) that both point to the same bucket. Since files are referenced by contenthash, it generally does not matter where they come from, so this isn't a problem. However to ensure the tags remain accurate, you should turn off `overwriteobjecttags` in the plugin settings for every environment except production.
Expand Down Expand Up @@ -83,4 +82,14 @@ To add a new source:
- Implement `tag_source`
- Add to the `tag_manager` class
- As part of an upgrade step, mark all objects `tagsyncstatus` to needing sync (using `tag_manager` class, or manually in the DB)
- As part of an upgrade step, queue a `update_object_tags` adhoc task to process the tag migration.
- As part of an upgrade step, queue a `update_object_tags` adhoc task to process the tag migration.

## Use cases

### Changing storage tier of orphaned files.
You may wish (for example to reduce risk of accidental data deletion) to instead of deleting files simply change their storage tier to a cheaper one With the tagging functionality, you can do this:

1. New objects will have `metadata` added that has their mimetype. You can use this to target for e.g. only course backups. Note metadata is not applied retroactively.
2. Objects in use will have their `environment` tag set to for example, `prod`. This lets you know the file is being used and not to touch it.
3. When a file is deleted in Moodle, it's location in objectfs will be changed to `OBJECT_LOCATION_ORPHAN`. If tagging is enabled, this will also change the `environment` tag in S3 to `orphan` letting you know the file is ready for deletion/storage tier changing.
4. You can setup a lifecycle policy to target the tags + metadata as you wish based on your use case.
24 changes: 18 additions & 6 deletions classes/local/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

use stdClass;
use tool_objectfs\local\store\object_file_system;
use tool_objectfs\local\tag\tag_manager;

/**
* [Description manager]
Expand Down Expand Up @@ -160,7 +161,7 @@ public static function update_object_by_hash($contenthash, $newlocation, $filesi
$newobject->filesize = isset($oldobject->filesize) ? $oldobject->filesize :
$DB->get_field('files', 'filesize', ['contenthash' => $contenthash], IGNORE_MULTIPLE);

return self::update_object($newobject, $newlocation);
return self::upsert_object($newobject, $newlocation);
}
$newobject->location = $newlocation;

Expand All @@ -173,9 +174,7 @@ public static function update_object_by_hash($contenthash, $newlocation, $filesi
$newobject->filesize = $filesize;
$newobject->timeduplicated = time();
}
$DB->insert_record('tool_objectfs_objects', $newobject);

return $newobject;
return self::upsert_object($newobject, $newlocation);
}

/**
Expand All @@ -185,16 +184,29 @@ public static function update_object_by_hash($contenthash, $newlocation, $filesi
* @return stdClass
* @throws \dml_exception
*/
public static function update_object(stdClass $object, $newlocation) {
public static function upsert_object(stdClass $object, $newlocation) {
global $DB;

// If location change is 'duplicated' we update timeduplicated.
if ($newlocation === OBJECT_LOCATION_DUPLICATED) {
$object->timeduplicated = time();
}

$locationchanged = !isset($object->location) || $object->location != $newlocation;
$object->location = $newlocation;
$DB->update_record('tool_objectfs_objects', $object);

// If id is set, update, else insert new.
if (empty($object->id)) {
$object->id = $DB->insert_record('tool_objectfs_objects', $object);
} else {
$DB->update_record('tool_objectfs_objects', $object);
}

// Post update, notify tag manager since the location tag likely needs changing.
if ($locationchanged && tag_manager::is_tagging_enabled_and_supported()) {
$fs = get_file_storage()->get_file_system();
$fs->push_object_tags($object->contenthash);
}

return $object;
}
Expand Down
2 changes: 1 addition & 1 deletion classes/local/object_manipulator/manipulator.php
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public function execute(array $objectrecords) {

$newlocation = $this->manipulate_object($objectrecord);
if (!empty($objectrecord->id)) {
manager::update_object($objectrecord, $newlocation);
manager::upsert_object($objectrecord, $newlocation);
} else {
manager::update_object_by_hash($objectrecord->contenthash, $newlocation);
}
Expand Down
16 changes: 16 additions & 0 deletions classes/local/store/object_file_system.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,22 @@ protected function get_local_path_from_hash($contenthash, $fetchifnotfound = fal
return $path;
}

/**
* Returns mimetype for a given hash
* @param string $contenthash
* @return string mimetype as stored in mdl_files
*/
protected function get_mimetype_from_hash(string $contenthash): string {
global $DB;
// We limit 1 because multiple files can have the same contenthash.
// However, they all have the same mimetype so it does not matter which one we query.
return $DB->get_field_sql('SELECT mimetype
FROM {files}
WHERE contenthash = :hash
LIMIT 1',
['hash' => $contenthash]);
}

/**
* get_remote_path_from_storedfile
* @param \stored_file $file
Expand Down
11 changes: 9 additions & 2 deletions classes/local/store/s3/client.php
Original file line number Diff line number Diff line change
Expand Up @@ -496,10 +496,11 @@ public function define_client_section($settings, $config) {
*
* @param string $localpath Path to a local file.
* @param string $contenthash Content hash of the file.
* @param string $mimetype the mimetype of the file being uploaded
*
* @throws \Exception if fails.
*/
public function upload_to_s3($localpath, $contenthash) {
public function upload_to_s3($localpath, $contenthash, string $mimetype) {
$filehandle = fopen($localpath, 'rb');

if (!$filehandle) {
Expand All @@ -511,7 +512,13 @@ public function upload_to_s3($localpath, $contenthash) {
$uploader = new \Aws\S3\ObjectUploader(
$this->client, $this->bucket,
$this->bucketkeyprefix . $externalpath,
$filehandle
$filehandle,
'private',
[
'params' => [
'ContentType' => $mimetype,
],
]
);
$uploader->upload();
fclose($filehandle);
Expand Down
3 changes: 2 additions & 1 deletion classes/local/store/s3/file_system.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ public function readfile(\stored_file $file) {
*/
public function copy_from_local_to_external($contenthash) {
$localpath = $this->get_local_path_from_hash($contenthash);
$mime = $this->get_mimetype_from_hash($contenthash);

try {
$this->get_external_client()->upload_to_s3($localpath, $contenthash);
$this->get_external_client()->upload_to_s3($localpath, $contenthash, $mime);
return true;
} catch (\Exception $e) {
$this->get_logger()->error_log(
Expand Down
10 changes: 10 additions & 0 deletions classes/local/tag/environment_source.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ private static function get_env(): ?string {
* @return string|null mime type for file.
*/
public function get_value_for_contenthash(string $contenthash): ?string {
global $DB;

// If object is orphaned, return 'orphan', otherwise return the env.
$isorphaned = $DB->record_exists('tool_objectfs_objects', ['contenthash' => $contenthash,
'location' => OBJECT_LOCATION_ORPHANED]);

if ($isorphaned) {
return 'orphan';
}

return self::get_env();
}
}
65 changes: 0 additions & 65 deletions classes/local/tag/mime_type_source.php

This file was deleted.

1 change: 0 additions & 1 deletion classes/local/tag/tag_manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ public static function get_defined_tag_sources(): array {
// All possible tag sources should be defined here.
// Note this should be a maximum of 10 sources, as this is an AWS limit.
return [
new mime_type_source(),
new environment_source(),
];
}
Expand Down
19 changes: 17 additions & 2 deletions tests/local/tagging_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,27 @@ public function test_gather_object_tags_for_upload() {
$object = $this->create_duplicated_object('gather tags for upload test');
$tags = tag_manager::gather_object_tags_for_upload($object->contenthash);

$this->assertArrayHasKey('mimetype', $tags);
$this->assertArrayHasKey('environment', $tags);
$this->assertEquals('text', $tags['mimetype']);
$this->assertEquals('test', $tags['environment']);
}

/**
* Tests gather_object_tags_for_upload when orphaned
* @covers \tool_objectfs\local\tag_manager::gather_object_tags_for_upload
*/
public function test_gather_object_tags_for_upload_orphaned() {
global $DB;
$object = $this->create_duplicated_object('gather tags for upload test');

// Change the object record to be orphaned.
$DB->update_record('tool_objectfs_objects', ['id' => $object->id, 'location' => OBJECT_LOCATION_ORPHANED]);

$tags = tag_manager::gather_object_tags_for_upload($object->contenthash);

$this->assertArrayHasKey('environment', $tags);
$this->assertEquals('orphan', $tags['environment']);
}

/**
* Tests store_tags_locally
* @covers \tool_objectfs\local\tag_manager::store_tags_locally
Expand Down

0 comments on commit c34bbbd

Please sign in to comment.