From b5461b0085d40c5a8e96dae67392fb3e0efa6146 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 30 Aug 2023 16:11:20 -0700 Subject: [PATCH 01/33] Add a `supports?` --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 1 + lib/valkyrie/storage/disk.rb | 6 ++++++ lib/valkyrie/storage/fedora.rb | 6 ++++++ lib/valkyrie/storage/memory.rb | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index ab67655ed..065377e5f 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -16,6 +16,7 @@ class Valkyrie::Specs::CustomResource < Valkyrie::Resource it { is_expected.to respond_to(:find_by).with_keywords(:id) } it { is_expected.to respond_to(:delete).with_keywords(:id) } it { is_expected.to respond_to(:upload).with_keywords(:file, :resource, :original_filename) } + it { is_expected.to respond_to(:supports?) } it "can upload a file which is just an IO" do io_file = Tempfile.new('temp_io') diff --git a/lib/valkyrie/storage/disk.rb b/lib/valkyrie/storage/disk.rb index 87975ea76..92922ed02 100644 --- a/lib/valkyrie/storage/disk.rb +++ b/lib/valkyrie/storage/disk.rb @@ -27,6 +27,12 @@ def handles?(id:) id.to_s.start_with?("disk://#{base_path}") end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(_feature) + false + end + def file_path(id) id.to_s.gsub(/^disk:\/\//, '') end diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index 6957942cb..de9c8f0d6 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -19,6 +19,12 @@ def handles?(id:) id.to_s.start_with?(PROTOCOL) end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(_feature) + false + end + # Return the file associated with the given identifier # @param id [Valkyrie::ID] # @return [Valkyrie::StorageAdapter::StreamFile] diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 6f102b16f..7e659f5d7 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -35,6 +35,12 @@ def handles?(id:) id.to_s.start_with?("memory://") end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(_feature) + false + end + # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:) From eaa5d741c6abd1d26daddc653c21d3e7f8cc0956 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 30 Aug 2023 16:11:28 -0700 Subject: [PATCH 02/33] Start speccing out versioning looking at Hyrax --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 12 ++++++++++++ lib/valkyrie/storage/memory.rb | 14 ++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 065377e5f..f5fa51edd 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -78,4 +78,16 @@ def open_files expect { storage_adapter.find_by(id: uploaded_file.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound expect { storage_adapter.find_by(id: Valkyrie::ID.new("noexist")) }.to raise_error Valkyrie::StorageAdapter::FileNotFound end + + it "can upload and find new versions" do + resource = Valkyrie::Specs::CustomResource.new(id: "test") + uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) + + new_version = storage_adapter.upload_version(file: file, original_filename: 'foo_final.jpg', previous_version_id: uploaded_file.id) + expect(uploaded_file.id).to eq new_version.id + + versions = storage_adapter.find_versions(id: new_version.id) + expect(versions.length).to eq 2 + expect(versions.first.id).to eq new_version.id + end end diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 7e659f5d7..e41ba63e9 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -20,6 +20,20 @@ def upload(file:, original_filename:, resource: nil, **_extra_arguments) cache[identifier] = Valkyrie::StorageAdapter::StreamFile.new(id: identifier, io: file) end + # @param file [IO] + # @param original_filename [String] + # @param previous_version_id [Valkyrie::ID] + # @param _extra_arguments [Hash] additional arguments which may be passed to + # other adapters. + # @return [Valkyrie::StorageAdapter::StreamFile] + def upload_version(file:, original_filename:, previous_version_id:) + previous_file = find_by(id: previous_version_id) + previous_file.id = Valkyrie::ID.new("#{previous_version_id}##{SecureRandom.uuid}") + cache[previous_file.id] = previous_file + cache["#{id}_versions"] ||= [] + cache["#{id}_versions"] = [previous_file] + cache["#{id}_versions"] + end + # Return the file associated with the given identifier # @param id [Valkyrie::ID] # @return [Valkyrie::StorageAdapter::StreamFile] From c90e1609a33b9ae08593c87bbc5568e17fad5938 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 30 Aug 2023 17:18:36 -0700 Subject: [PATCH 03/33] Initial storage adapter versioning API for memory adapter. --- .../specs/shared_specs/storage_adapter.rb | 11 ++++++++- lib/valkyrie/storage/memory.rb | 23 +++++++++++++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index f5fa51edd..5ec2f37cc 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -80,14 +80,23 @@ def open_files end it "can upload and find new versions" do + pending "Versioning not supported" unless storage_adapter.supports?(:versions) + size = file.size resource = Valkyrie::Specs::CustomResource.new(id: "test") uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) - new_version = storage_adapter.upload_version(file: file, original_filename: 'foo_final.jpg', previous_version_id: uploaded_file.id) + f = Tempfile.new + f.puts "Test File" + f.rewind + + new_version = storage_adapter.upload_version(file: f, original_filename: 'foo_final.jpg', previous_version_id: uploaded_file.id) expect(uploaded_file.id).to eq new_version.id versions = storage_adapter.find_versions(id: new_version.id) expect(versions.length).to eq 2 expect(versions.first.id).to eq new_version.id + expect(versions.first.size).not_to eq size + ensure + f.close end end diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index e41ba63e9..3c7afac0f 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -27,11 +27,19 @@ def upload(file:, original_filename:, resource: nil, **_extra_arguments) # other adapters. # @return [Valkyrie::StorageAdapter::StreamFile] def upload_version(file:, original_filename:, previous_version_id:) + # Get previous file and add a UUID to the end of it. previous_file = find_by(id: previous_version_id) - previous_file.id = Valkyrie::ID.new("#{previous_version_id}##{SecureRandom.uuid}") + previous_file = previous_file.new(id: Valkyrie::ID.new("#{previous_version_id}##{SecureRandom.uuid}")) cache[previous_file.id] = previous_file - cache["#{id}_versions"] ||= [] - cache["#{id}_versions"] = [previous_file] + cache["#{id}_versions"] + cache["#{previous_version_id}_versions"] ||= [] + cache["#{previous_version_id}_versions"] = [previous_file] + cache["#{previous_version_id}_versions"] + cache[previous_version_id] = Valkyrie::StorageAdapter::StreamFile.new(id: previous_version_id, io: file) + end + + # @param id [Valkyrie::ID] + # @return [Array] + def find_versions(id:) + [find_by(id: id)] + cache.fetch("#{id}_versions", []) end # Return the file associated with the given identifier @@ -51,8 +59,13 @@ def handles?(id:) # @param feature [Symbol] Feature to test for. # @return [Boolean] true if the adapter supports the given feature - def supports?(_feature) - false + def supports?(feature) + case feature + when :versions + true + else + false + end end # Delete the file on disk associated with the given identifier. From 95f7133f60d98beea4355d208c88619b64973b01 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 30 Aug 2023 17:21:54 -0700 Subject: [PATCH 04/33] Add versioning questions. --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 5ec2f37cc..6e3446d26 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -96,6 +96,10 @@ def open_files expect(versions.length).to eq 2 expect(versions.first.id).to eq new_version.id expect(versions.first.size).not_to eq size + # TODO + # 1. How do I delete a version + # 2. Is there a way to delete all versions. + # 3. If I delete the root version, can I still query for its versions. ensure f.close end From 5379c7be6f233e64c1794d31dac93022c1620abe Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 31 Aug 2023 12:21:41 -0700 Subject: [PATCH 05/33] Add notes. --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 9 +++++++++ spec/valkyrie/storage/disk_spec.rb | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 6e3446d26..6c57c0ff5 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -100,6 +100,15 @@ def open_files # 1. How do I delete a version # 2. Is there a way to delete all versions. # 3. If I delete the root version, can I still query for its versions. + # + # Feedback: previous_version_id implies that I can upload a version of a + # version, not just the root. + # + # I need a current ID (get the most recent version) and every version + # including the current one needs a stable ID. + # + # Deleting: when I delete a root node, find_versions should continue to + # work, but find_by with the root node ID and no version ID should NotFound. ensure f.close end diff --git a/spec/valkyrie/storage/disk_spec.rb b/spec/valkyrie/storage/disk_spec.rb index 545bb3c5b..dbafe52d4 100644 --- a/spec/valkyrie/storage/disk_spec.rb +++ b/spec/valkyrie/storage/disk_spec.rb @@ -17,4 +17,8 @@ expect(storage_adapter.handles?(id: "disk://#{ROOT_PATH.join('tmp', 'wrong')}")).to eq false end end + + # TODO: Add a toggle to enable versioning. + # Think more about what happens if someone turns versioning on for an existing + # repository. end From 3bdcaeed8eddbaa9fae05b6b0efd3a477344aecf Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 31 Aug 2023 13:21:34 -0700 Subject: [PATCH 06/33] New implementation + notes Co-authored-by: Anna Headley --- .../specs/shared_specs/storage_adapter.rb | 34 ++++++++++++------- lib/valkyrie/storage/memory.rb | 31 +++++++++++------ lib/valkyrie/storage_adapter.rb | 1 + 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 6c57c0ff5..733b28610 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -81,35 +81,43 @@ def open_files it "can upload and find new versions" do pending "Versioning not supported" unless storage_adapter.supports?(:versions) - size = file.size resource = Valkyrie::Specs::CustomResource.new(id: "test") uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) + expect(uploaded_file.version_id).not_to be_blank f = Tempfile.new f.puts "Test File" f.rewind - new_version = storage_adapter.upload_version(file: f, original_filename: 'foo_final.jpg', previous_version_id: uploaded_file.id) + # upload_version + new_version = storage_adapter.upload_version(id: uploaded_file.id, file: f) expect(uploaded_file.id).to eq new_version.id + expect(uploaded_file.version_id).not_to eq new_version.version_id + # find_versions + # Two versions of the same file have the same id, but different version_ids, + # use case: I want to store metadata about a file when it's uploaded as a + # version and refer to it consistently. versions = storage_adapter.find_versions(id: new_version.id) expect(versions.length).to eq 2 expect(versions.first.id).to eq new_version.id - expect(versions.first.size).not_to eq size + expect(versions.first.version_id).to eq new_version.version_id + + expect(versions.last.id).to eq uploaded_file.id + expect(versions.last.version_id).to eq uploaded_file.version_id + + expect(versions.first.size).not_to eq versions.last.size + + expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id # TODO - # 1. How do I delete a version - # 2. Is there a way to delete all versions. - # 3. If I delete the root version, can I still query for its versions. - # - # Feedback: previous_version_id implies that I can upload a version of a - # version, not just the root. - # - # I need a current ID (get the most recent version) and every version - # including the current one needs a stable ID. + # 1. How do I delete a version: storage_adapter.delete(id: version_id) + # 2. Is there a way to delete all versions: storage_adapter.delete(id: id, purge_versions: true) + # 3. If I delete the root version, can I still query for its versions. Yes. + # 4. I can find_by a version ID and get it. # # Deleting: when I delete a root node, find_versions should continue to # work, but find_by with the root node ID and no version ID should NotFound. ensure - f.close + f&.close end end diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 3c7afac0f..e5955e531 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -17,7 +17,9 @@ def initialize # @return [Valkyrie::StorageAdapter::StreamFile] def upload(file:, original_filename:, resource: nil, **_extra_arguments) identifier = Valkyrie::ID.new("memory://#{resource.id}") - cache[identifier] = Valkyrie::StorageAdapter::StreamFile.new(id: identifier, io: file) + version_id = Valkyrie::ID.new("#{identifier}##{SecureRandom.uuid}") + cache[identifier] ||= {} + cache[identifier][:current] = Valkyrie::StorageAdapter::StreamFile.new(id: identifier, io: file, version_id: version_id) end # @param file [IO] @@ -26,20 +28,20 @@ def upload(file:, original_filename:, resource: nil, **_extra_arguments) # @param _extra_arguments [Hash] additional arguments which may be passed to # other adapters. # @return [Valkyrie::StorageAdapter::StreamFile] - def upload_version(file:, original_filename:, previous_version_id:) + def upload_version(id:, file:) # Get previous file and add a UUID to the end of it. - previous_file = find_by(id: previous_version_id) - previous_file = previous_file.new(id: Valkyrie::ID.new("#{previous_version_id}##{SecureRandom.uuid}")) - cache[previous_file.id] = previous_file - cache["#{previous_version_id}_versions"] ||= [] - cache["#{previous_version_id}_versions"] = [previous_file] + cache["#{previous_version_id}_versions"] - cache[previous_version_id] = Valkyrie::StorageAdapter::StreamFile.new(id: previous_version_id, io: file) + current_file = find_by(id: id) + new_file = current_file.new(io: file, version_id: Valkyrie::ID.new("#{id}##{SecureRandom.uuid}")) + cache[current_file.id][:current] = new_file + cache[current_file.id][:versions] ||= [] + cache[current_file.id][:versions] = [current_file] + cache[current_file.id][:versions] + new_file end # @param id [Valkyrie::ID] # @return [Array] def find_versions(id:) - [find_by(id: id)] + cache.fetch("#{id}_versions", []) + [cache[id][:current]] + cache[id].fetch(:versions, []) end # Return the file associated with the given identifier @@ -47,8 +49,15 @@ def find_versions(id:) # @return [Valkyrie::StorageAdapter::StreamFile] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - raise Valkyrie::StorageAdapter::FileNotFound unless cache[id] - cache[id] + no_version_id = Valkyrie::ID.new(id.to_s.split("#").first) + raise Valkyrie::StorageAdapter::FileNotFound unless cache[no_version_id] + if id == no_version_id + cache[id][:current] + else + find_versions(id: no_version_id).find do |file| + file.version_id == id + end + end end # @param id [Valkyrie::ID] diff --git a/lib/valkyrie/storage_adapter.rb b/lib/valkyrie/storage_adapter.rb index 8eed573af..1ed05bbd6 100644 --- a/lib/valkyrie/storage_adapter.rb +++ b/lib/valkyrie/storage_adapter.rb @@ -67,6 +67,7 @@ def adapter_for(id:) class File < Dry::Struct attribute :id, Valkyrie::Types::Any attribute :io, Valkyrie::Types::Any + attribute :version_id, Valkyrie::Types::Any.optional.default(nil) delegate :size, :read, :rewind, :close, to: :io def stream io From 7014327c24a3a5f68313d2214bca676b12943ebb Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 31 Aug 2023 17:29:17 -0700 Subject: [PATCH 07/33] Add restoring a version and deleting a version. --- .../specs/shared_specs/storage_adapter.rb | 24 ++++++++++ lib/valkyrie/storage/memory.rb | 45 +++++++++++++------ 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 733b28610..729993948 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -109,6 +109,30 @@ def open_files expect(versions.first.size).not_to eq versions.last.size expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id + + # Delete versions + + # Deleting a version should leave the current version + storage_adapter.delete(id: uploaded_file.version_id) + expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1 + expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + + # Deleting the current record should push it into the versions history. + storage_adapter.delete(id: new_version.id) + expect { storage_adapter.find_by(id: new_version.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + expect(storage_adapter.find_versions(id: new_version.id).length).to eq 1 + + # Restoring a previous version is just pumping its file into upload_version + newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) + expect(newest_version.version_id).not_to eq new_version.id + expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id + + # I can restore a version twice + newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) + expect(newest_version.version_id).not_to eq new_version.id + expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id + expect(storage_adapter.find_versions(id: newest_version.id).length).to eq 3 + # TODO # 1. How do I delete a version: storage_adapter.delete(id: version_id) # 2. Is there a way to delete all versions: storage_adapter.delete(id: id, purge_versions: true) diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index e5955e531..1aef8b34b 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -30,18 +30,18 @@ def upload(file:, original_filename:, resource: nil, **_extra_arguments) # @return [Valkyrie::StorageAdapter::StreamFile] def upload_version(id:, file:) # Get previous file and add a UUID to the end of it. - current_file = find_by(id: id) - new_file = current_file.new(io: file, version_id: Valkyrie::ID.new("#{id}##{SecureRandom.uuid}")) - cache[current_file.id][:current] = new_file - cache[current_file.id][:versions] ||= [] - cache[current_file.id][:versions] = [current_file] + cache[current_file.id][:versions] + new_file = Valkyrie::StorageAdapter::StreamFile.new(id: id, io: file, version_id: Valkyrie::ID.new("#{id}##{SecureRandom.uuid}")) + current_file = cache[id][:current] + cache[id][:current] = new_file + cache[id][:versions] ||= [] + cache[id][:versions].prepend(current_file) if current_file new_file end # @param id [Valkyrie::ID] # @return [Array] def find_versions(id:) - [cache[id][:current]] + cache[id].fetch(:versions, []) + [cache[id][:current] || nil].compact + cache[id].fetch(:versions, []) end # Return the file associated with the given identifier @@ -49,15 +49,18 @@ def find_versions(id:) # @return [Valkyrie::StorageAdapter::StreamFile] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - no_version_id = Valkyrie::ID.new(id.to_s.split("#").first) + no_version_id, _version = id_and_version(id) raise Valkyrie::StorageAdapter::FileNotFound unless cache[no_version_id] - if id == no_version_id - cache[id][:current] - else - find_versions(id: no_version_id).find do |file| - file.version_id == id + version = + if id == no_version_id + cache[id][:current] + else + find_versions(id: no_version_id).find do |file| + file.version_id == id + end end - end + raise Valkyrie::StorageAdapter::FileNotFound unless version + version end # @param id [Valkyrie::ID] @@ -77,10 +80,24 @@ def supports?(feature) end end + def id_and_version(id) + id, version = id.to_s.split("#") + [Valkyrie::ID.new(id), version] + end + # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:) - cache.delete(id) + base_id, version = id_and_version(id) + if version && cache[base_id][:current]&.version_id != id + cache[base_id][:versions].reject! do |file| + file.version_id == id + end + else + cache[base_id][:versions] ||= [] + cache[base_id][:versions].prepend(cache[base_id][:current]) + cache[base_id][:current] = nil + end nil end end From 62c731946c0e357c2a4fbd1e926b694ef5fb4a64 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 5 Sep 2023 09:40:43 -0700 Subject: [PATCH 08/33] Add purge_versions: true --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 7 +++++++ lib/valkyrie/storage/memory.rb | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 729993948..ad8a99074 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -133,6 +133,13 @@ def open_files expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id expect(storage_adapter.find_versions(id: newest_version.id).length).to eq 3 + # I can delete all versions. + + storage_adapter.delete(id: newest_version.id, purge_versions: true) + + expect(storage_adapter.find_versions(id: new_version.id)).to eq [] + expect { storage_adapter.find_by(id: newest_version.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + # TODO # 1. How do I delete a version: storage_adapter.delete(id: version_id) # 2. Is there a way to delete all versions: storage_adapter.delete(id: id, purge_versions: true) diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 1aef8b34b..bc8d2bed6 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -41,6 +41,7 @@ def upload_version(id:, file:) # @param id [Valkyrie::ID] # @return [Array] def find_versions(id:) + return [] if cache[id].nil? [cache[id][:current] || nil].compact + cache[id].fetch(:versions, []) end @@ -87,7 +88,7 @@ def id_and_version(id) # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] - def delete(id:) + def delete(id:, purge_versions: false) base_id, version = id_and_version(id) if version && cache[base_id][:current]&.version_id != id cache[base_id][:versions].reject! do |file| @@ -98,6 +99,7 @@ def delete(id:) cache[base_id][:versions].prepend(cache[base_id][:current]) cache[base_id][:current] = nil end + cache.delete(base_id) if purge_versions nil end end From e8300c1e4b41554bd922c27c7b73a9a5ef5eec2c Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 5 Sep 2023 09:41:01 -0700 Subject: [PATCH 09/33] Clear TODOs --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index ad8a99074..15ffca709 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -140,14 +140,6 @@ def open_files expect(storage_adapter.find_versions(id: new_version.id)).to eq [] expect { storage_adapter.find_by(id: newest_version.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound - # TODO - # 1. How do I delete a version: storage_adapter.delete(id: version_id) - # 2. Is there a way to delete all versions: storage_adapter.delete(id: id, purge_versions: true) - # 3. If I delete the root version, can I still query for its versions. Yes. - # 4. I can find_by a version ID and get it. - # - # Deleting: when I delete a root node, find_versions should continue to - # work, but find_by with the root node ID and no version ID should NotFound. ensure f&.close end From 54b3cc173474607410d6cff376cf3edeb883d291 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 5 Sep 2023 11:39:42 -0700 Subject: [PATCH 10/33] WIP --- lib/valkyrie/storage.rb | 1 + lib/valkyrie/storage/versioned_disk.rb | 99 ++++++++++++++++++++ spec/valkyrie/storage/versioned_disk_spec.rb | 20 ++++ 3 files changed, 120 insertions(+) create mode 100644 lib/valkyrie/storage/versioned_disk.rb create mode 100644 spec/valkyrie/storage/versioned_disk_spec.rb diff --git a/lib/valkyrie/storage.rb b/lib/valkyrie/storage.rb index 289e92a7f..c5b009be1 100644 --- a/lib/valkyrie/storage.rb +++ b/lib/valkyrie/storage.rb @@ -31,6 +31,7 @@ module Valkyrie # @see lib/valkyrie/specs/shared_specs/storage_adapter.rb module Storage require 'valkyrie/storage/disk' + require 'valkyrie/storage/versioned_disk' require 'valkyrie/storage/fedora' require 'valkyrie/storage/memory' end diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb new file mode 100644 index 000000000..9435eb436 --- /dev/null +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true +module Valkyrie::Storage + # Implements the DataMapper Pattern to store binary data on disk + class VersionedDisk + attr_reader :base_path, :path_generator, :file_mover + def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:mv)) + @base_path = Pathname.new(base_path.to_s) + @path_generator = path_generator.new(base_path: base_path) + @file_mover = file_mover + end + + # @param file [IO] + # @param original_filename [String] + # @param resource [Valkyrie::Resource] + # @param _extra_arguments [Hash] additional arguments which may be passed to other adapters + # @return [Valkyrie::StorageAdapter::File] + def upload(file:, original_filename:, resource: nil, **_extra_arguments) + version_timestamp = Time.now.to_i + new_path = path_generator.generate(resource: resource, file: file, original_filename: "v-#{version_timestamp}-#{original_filename}") + FileUtils.mkdir_p(new_path.parent) + file_mover.call(file.path, new_path) + find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) + end + + # @param id [Valkyrie::ID] + # @return [Boolean] true if this adapter can handle this type of identifer + def handles?(id:) + id.to_s.start_with?("versiondisk://#{base_path}") + end + + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(feature) + return true if feature == :versions + false + end + + def file_path(id, version) + version_id(id, version).to_s.gsub(/^versiondisk:\/\//, '') + end + + # Return the file associated with the given identifier + # @param id [Valkyrie::ID] + # @return [Valkyrie::StorageAdapter::File] + # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found + def find_by(id:) + id, version = id_and_version(id) + version = get_current_version(id) if version == "current" + Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(id.to_s), io: LazyFile.open(file_path(id, version), 'rb'), version_id: version_id(id, version)) + rescue Errno::ENOENT + raise Valkyrie::StorageAdapter::FileNotFound + end + + def get_current_version(id) + root = Pathname.new(file_path(id, "current")) + original_name = root.basename.to_s.split("v-").last.split("-", 2).last + end + + def id_and_version(id) + pre_version, post_version = id.to_s.split("v-") + version, post_version = post_version.split("-", 2) + [Valkyrie::ID.new("#{pre_version}v-current-#{post_version}"), version] + end + + def version_id(id, version) + Valkyrie::ID.new(id.to_s.gsub("v-current", "v-#{version}")) + end + + ## LazyFile takes File.open parameters but doesn't leave a file handle open on + # instantiation. This way StorageAdapter#find_by doesn't open a handle + # silently and never clean up after itself. + class LazyFile + def self.open(path, mode) + # Open the file regularly and close it, so it can error if it doesn't + # exist. + File.open(path, mode).close + new(path, mode) + end + + delegate(*(File.instance_methods - Object.instance_methods), to: :_inner_file) + + def initialize(path, mode) + @__path = path + @__mode = mode + end + + def _inner_file + @_inner_file ||= File.open(@__path, @__mode) + end + end + + # Delete the file on disk associated with the given identifier. + # @param id [Valkyrie::ID] + def delete(id:) + path = file_path(id) + FileUtils.rm_rf(path) if File.exist?(path) + end + end +end diff --git a/spec/valkyrie/storage/versioned_disk_spec.rb b/spec/valkyrie/storage/versioned_disk_spec.rb new file mode 100644 index 000000000..778ead93c --- /dev/null +++ b/spec/valkyrie/storage/versioned_disk_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true +require 'spec_helper' +require 'valkyrie/specs/shared_specs' +include ActionDispatch::TestProcess + +RSpec.describe Valkyrie::Storage::VersionedDisk do + it_behaves_like "a Valkyrie::StorageAdapter" + let(:storage_adapter) { described_class.new(base_path: ROOT_PATH.join("tmp", "files_test")) } + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + + describe ".handles?" do + it "matches on base_path" do + expect(storage_adapter.handles?(id: "versiondisk://#{ROOT_PATH.join('tmp', 'files_test')}")).to eq true + end + + it "does not match when base_path differs" do + expect(storage_adapter.handles?(id: "versiondisk://#{ROOT_PATH.join('tmp', 'wrong')}")).to eq false + end + end +end From e92e903591c6d26a1df07e82aea2fa331d1b3444 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 5 Sep 2023 14:27:36 -0700 Subject: [PATCH 11/33] Back to failing only versioning. --- lib/valkyrie/storage/versioned_disk.rb | 42 +++++++++++++------- spec/valkyrie/storage/versioned_disk_spec.rb | 3 ++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 9435eb436..426e619a4 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -35,8 +35,8 @@ def supports?(feature) false end - def file_path(id, version) - version_id(id, version).to_s.gsub(/^versiondisk:\/\//, '') + def file_path(version_id) + version_id.to_s.gsub(/^versiondisk:\/\//, '') end # Return the file associated with the given identifier @@ -44,26 +44,40 @@ def file_path(id, version) # @return [Valkyrie::StorageAdapter::File] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - id, version = id_and_version(id) - version = get_current_version(id) if version == "current" - Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(id.to_s), io: LazyFile.open(file_path(id, version), 'rb'), version_id: version_id(id, version)) + version_id = version_id(id) + current_version = current_version_id(id) + Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(current_version.to_s), io: LazyFile.open(file_path(version_id), 'rb'), version_id: version_id) rescue Errno::ENOENT raise Valkyrie::StorageAdapter::FileNotFound end + def version_id(id) + return id unless id.to_s.include?("v-") + pre_version, version, post_version = split_version(id) + version = get_current_version(id) if version == "current" + Valkyrie::ID.new("#{pre_version}v-#{version}-#{post_version}") + end + + def current_version_id(id) + return id unless id.to_s.include?("v-") + pre_version, version, post_version = split_version(id) + return id if version == "current" + Valkyrie::ID.new("#{pre_version}v-current-#{post_version}") + end + def get_current_version(id) - root = Pathname.new(file_path(id, "current")) - original_name = root.basename.to_s.split("v-").last.split("-", 2).last + root = Pathname.new(file_path(id)) + _, _version, original_name = split_version(id) + files = root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) } + return nil if files.blank? + _, version, _post_version = split_version(files.first) + version end - def id_and_version(id) + def split_version(id) pre_version, post_version = id.to_s.split("v-") version, post_version = post_version.split("-", 2) - [Valkyrie::ID.new("#{pre_version}v-current-#{post_version}"), version] - end - - def version_id(id, version) - Valkyrie::ID.new(id.to_s.gsub("v-current", "v-#{version}")) + [pre_version, version, post_version] end ## LazyFile takes File.open parameters but doesn't leave a file handle open on @@ -92,7 +106,7 @@ def _inner_file # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:) - path = file_path(id) + path = file_path(version_id(id)) FileUtils.rm_rf(path) if File.exist?(path) end end diff --git a/spec/valkyrie/storage/versioned_disk_spec.rb b/spec/valkyrie/storage/versioned_disk_spec.rb index 778ead93c..f1c8b3237 100644 --- a/spec/valkyrie/storage/versioned_disk_spec.rb +++ b/spec/valkyrie/storage/versioned_disk_spec.rb @@ -7,6 +7,9 @@ it_behaves_like "a Valkyrie::StorageAdapter" let(:storage_adapter) { described_class.new(base_path: ROOT_PATH.join("tmp", "files_test")) } let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + before do + FileUtils.rm_rf(ROOT_PATH.join("tmp", "files_test")) + end describe ".handles?" do it "matches on base_path" do From 8ebbb33b3f0cc227f53d7d84dc042f762c51ccae Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Tue, 5 Sep 2023 15:12:51 -0700 Subject: [PATCH 12/33] Implement up until deletion markers. --- lib/valkyrie/storage/versioned_disk.rb | 74 +++++++++++++++----------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 426e619a4..78ce88ae1 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -15,13 +15,37 @@ def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedSt # @param _extra_arguments [Hash] additional arguments which may be passed to other adapters # @return [Valkyrie::StorageAdapter::File] def upload(file:, original_filename:, resource: nil, **_extra_arguments) - version_timestamp = Time.now.to_i + version_timestamp = current_timestamp new_path = path_generator.generate(resource: resource, file: file, original_filename: "v-#{version_timestamp}-#{original_filename}") FileUtils.mkdir_p(new_path.parent) - file_mover.call(file.path, new_path) + file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) end + def current_timestamp + Time.now.strftime("%s%N") + end + + def upload_version(id:, file:) + version_timestamp = current_timestamp + version_id = version_id(id) + existing_path = file_path(version_id) + _, version, _post = split_version(version_id) + new_path = Pathname.new(existing_path.gsub(version, version_timestamp.to_s)) + FileUtils.mkdir_p(new_path.parent) + file_mover.call(file.try(:path) || file.try(:disk_path), new_path) + find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) + end + + def find_versions(id:) + root = Pathname.new(file_path(id)) + _, _version, original_name = split_version(id) + files = root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) && !file.basename.to_s.include?("deletionmarker") }.sort.reverse + files.map do |file| + find_by(id: Valkyrie::ID.new("versiondisk://#{file}")) + end + end + # @param id [Valkyrie::ID] # @return [Boolean] true if this adapter can handle this type of identifer def handles?(id:) @@ -46,7 +70,7 @@ def file_path(version_id) def find_by(id:) version_id = version_id(id) current_version = current_version_id(id) - Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(current_version.to_s), io: LazyFile.open(file_path(version_id), 'rb'), version_id: version_id) + Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(current_version.to_s), io: ::Valkyrie::Storage::Disk::LazyFile.open(file_path(version_id), 'rb'), version_id: version_id) rescue Errno::ENOENT raise Valkyrie::StorageAdapter::FileNotFound end @@ -66,11 +90,9 @@ def current_version_id(id) end def get_current_version(id) - root = Pathname.new(file_path(id)) - _, _version, original_name = split_version(id) - files = root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) } - return nil if files.blank? - _, version, _post_version = split_version(files.first) + current_version_id = find_versions(id: id).first&.version_id + return nil if current_version_id.nil? + _, version, _post_version = split_version(current_version_id) version end @@ -80,34 +102,22 @@ def split_version(id) [pre_version, version, post_version] end - ## LazyFile takes File.open parameters but doesn't leave a file handle open on - # instantiation. This way StorageAdapter#find_by doesn't open a handle - # silently and never clean up after itself. - class LazyFile - def self.open(path, mode) - # Open the file regularly and close it, so it can error if it doesn't - # exist. - File.open(path, mode).close - new(path, mode) - end - - delegate(*(File.instance_methods - Object.instance_methods), to: :_inner_file) - - def initialize(path, mode) - @__path = path - @__mode = mode - end - - def _inner_file - @_inner_file ||= File.open(@__path, @__mode) - end - end - # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:) path = file_path(version_id(id)) - FileUtils.rm_rf(path) if File.exist?(path) + if version_id(id).to_s.include?(get_current_version(id)) + # Leave a deletion marker behind. + version_timestamp = current_timestamp + version_id = version_id(id) + existing_path = file_path(version_id) + _, version, _post = split_version(version_id) + new_path = Pathname.new(existing_path.gsub(version, "#{version_timestamp}-deletionmarker")) + FileUtils.mkdir_p(new_path.parent) + File.open(new_path, 'w') { |f| f.puts "Deleted" } + elsif File.exist?(path) + FileUtils.rm_rf(path) + end end end end From 7490fd4ad40dedc352dd8ad766c306e961ed13f9 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 6 Sep 2023 09:15:40 -0700 Subject: [PATCH 13/33] Passing tests. --- lib/valkyrie/storage/versioned_disk.rb | 26 ++++++++++++++++++-------- spec/spec_helper.rb | 1 + 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 78ce88ae1..1cc6838f2 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -3,7 +3,7 @@ module Valkyrie::Storage # Implements the DataMapper Pattern to store binary data on disk class VersionedDisk attr_reader :base_path, :path_generator, :file_mover - def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:mv)) + def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:cp)) @base_path = Pathname.new(base_path.to_s) @path_generator = path_generator.new(base_path: base_path) @file_mover = file_mover @@ -29,6 +29,8 @@ def current_timestamp def upload_version(id:, file:) version_timestamp = current_timestamp version_id = version_id(id) + # It's a deletion marker.. + version_id = version_files(id: id)[1] if version_id.to_s.include?("v--") existing_path = file_path(version_id) _, version, _post = split_version(version_id) new_path = Pathname.new(existing_path.gsub(version, version_timestamp.to_s)) @@ -38,14 +40,17 @@ def upload_version(id:, file:) end def find_versions(id:) - root = Pathname.new(file_path(id)) - _, _version, original_name = split_version(id) - files = root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) && !file.basename.to_s.include?("deletionmarker") }.sort.reverse - files.map do |file| + version_files(id: id).select { |x| !x.to_s.include?("deletionmarker") }.map do |file| find_by(id: Valkyrie::ID.new("versiondisk://#{file}")) end end + def version_files(id:) + root = Pathname.new(file_path(id)) + _, _version, original_name = split_version(id) + root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) }.sort.reverse + end + # @param id [Valkyrie::ID] # @return [Boolean] true if this adapter can handle this type of identifer def handles?(id:) @@ -90,8 +95,9 @@ def current_version_id(id) end def get_current_version(id) - current_version_id = find_versions(id: id).first&.version_id + current_version_id = version_files(id: id)&.first return nil if current_version_id.nil? + return nil if current_version_id.to_s.include?("deletionmarker") _, version, _post_version = split_version(current_version_id) version end @@ -104,9 +110,13 @@ def split_version(id) # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] - def delete(id:) + def delete(id:, purge_versions: false) path = file_path(version_id(id)) - if version_id(id).to_s.include?(get_current_version(id)) + if purge_versions + version_files(id: id).each do |file| + FileUtils.rm_rf(file) + end + elsif version_id(id).to_s.include?(get_current_version(id)) # Leave a deletion marker behind. version_timestamp = current_timestamp version_id = version_id(id) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index aa1f13696..82c26c5ea 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -12,6 +12,7 @@ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__) require "valkyrie" require 'pry' +require 'pry-byebug' require 'action_dispatch' require 'webmock/rspec' require 'timecop' From e9e349ae4879b8c72fa45889527e538d92d0d9c7 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 6 Sep 2023 10:00:41 -0700 Subject: [PATCH 14/33] Versioned Disk Adapter. --- lib/valkyrie/storage/versioned_disk.rb | 119 ++++++++++++++++--------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 1cc6838f2..2953bebf0 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -2,6 +2,64 @@ module Valkyrie::Storage # Implements the DataMapper Pattern to store binary data on disk class VersionedDisk + class VersionId + attr_reader :id + def initialize(id) + @id = id + end + + def current_reference_id + self.class.new(Valkyrie::ID.new(string_id.gsub(version, "current"))) + end + + def resolve_current + return self unless reference? + version_files.first + end + + def file_path + @file_path ||= string_id.gsub(/^versiondisk:\/\//, '') + end + + def version_files + root = Pathname.new(file_path) + root.parent.children.select { |file| file.basename.to_s.end_with?(filename) }.sort.reverse.map do |file| + VersionId.new(Valkyrie::ID.new("versiondisk://#{file}")) + end + end + + def deletion_marker? + string_id.include?("deletionmarker") + end + + def current? + version_files.first.id == id + end + + def reference? + version == "current" + end + + def versioned? + string_id.include?("v-") + end + + def file_root + string_id.split("v-").first + end + + def version + string_id.split("v-").last.split("-", 2).first + end + + def filename + string_id.split("v-").last.split("-", 2).last.gsub("deletionmarker-", "") + end + + def string_id + id.to_s + end + end attr_reader :base_path, :path_generator, :file_mover def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:cp)) @base_path = Pathname.new(base_path.to_s) @@ -29,11 +87,9 @@ def current_timestamp def upload_version(id:, file:) version_timestamp = current_timestamp version_id = version_id(id) - # It's a deletion marker.. - version_id = version_files(id: id)[1] if version_id.to_s.include?("v--") - existing_path = file_path(version_id) - _, version, _post = split_version(version_id) - new_path = Pathname.new(existing_path.gsub(version, version_timestamp.to_s)) + version_id = version_id.version_files[1] if version_id.deletion_marker? + existing_path = version_id.file_path + new_path = Pathname.new(existing_path.gsub(version_id.version, version_timestamp.to_s)) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) @@ -47,8 +103,8 @@ def find_versions(id:) def version_files(id:) root = Pathname.new(file_path(id)) - _, _version, original_name = split_version(id) - root.parent.children.select { |file| file.basename.to_s.end_with?(original_name) }.sort.reverse + id = VersionId.new(id) + root.parent.children.select { |file| file.basename.to_s.end_with?(id.filename) }.sort.reverse end # @param id [Valkyrie::ID] @@ -74,59 +130,34 @@ def file_path(version_id) # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) version_id = version_id(id) - current_version = current_version_id(id) - Valkyrie::StorageAdapter::File.new(id: Valkyrie::ID.new(current_version.to_s), io: ::Valkyrie::Storage::Disk::LazyFile.open(file_path(version_id), 'rb'), version_id: version_id) + raise Valkyrie::StorageAdapter::FileNotFound if version_id.deletion_marker? + Valkyrie::StorageAdapter::File.new(id: version_id.current_reference_id.id, io: ::Valkyrie::Storage::Disk::LazyFile.open(version_id.file_path, 'rb'), version_id: version_id.id) rescue Errno::ENOENT raise Valkyrie::StorageAdapter::FileNotFound end def version_id(id) - return id unless id.to_s.include?("v-") - pre_version, version, post_version = split_version(id) - version = get_current_version(id) if version == "current" - Valkyrie::ID.new("#{pre_version}v-#{version}-#{post_version}") - end - - def current_version_id(id) - return id unless id.to_s.include?("v-") - pre_version, version, post_version = split_version(id) - return id if version == "current" - Valkyrie::ID.new("#{pre_version}v-current-#{post_version}") - end - - def get_current_version(id) - current_version_id = version_files(id: id)&.first - return nil if current_version_id.nil? - return nil if current_version_id.to_s.include?("deletionmarker") - _, version, _post_version = split_version(current_version_id) - version - end - - def split_version(id) - pre_version, post_version = id.to_s.split("v-") - version, post_version = post_version.split("-", 2) - [pre_version, version, post_version] + id = VersionId.new(id) + return id unless id.versioned? + id.resolve_current end # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:, purge_versions: false) - path = file_path(version_id(id)) + id = version_id(id).resolve_current if purge_versions - version_files(id: id).each do |file| - FileUtils.rm_rf(file) + id.version_files.each do |version_id| + FileUtils.rm_rf(version_id.file_path) end - elsif version_id(id).to_s.include?(get_current_version(id)) + elsif id.current? # Leave a deletion marker behind. version_timestamp = current_timestamp - version_id = version_id(id) - existing_path = file_path(version_id) - _, version, _post = split_version(version_id) - new_path = Pathname.new(existing_path.gsub(version, "#{version_timestamp}-deletionmarker")) + new_path = Pathname.new(id.file_path.gsub(id.version, "#{version_timestamp}-deletionmarker")) FileUtils.mkdir_p(new_path.parent) File.open(new_path, 'w') { |f| f.puts "Deleted" } - elsif File.exist?(path) - FileUtils.rm_rf(path) + elsif File.exist?(id.file_path) + FileUtils.rm_rf(id.file_path) end end end From 25583fa1fa3dd2fa0df23342f287974cefed4a5e Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 6 Sep 2023 10:02:29 -0700 Subject: [PATCH 15/33] Add comment. --- lib/valkyrie/storage/versioned_disk.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 2953bebf0..ba800ab15 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -1,6 +1,10 @@ # frozen_string_literal: true module Valkyrie::Storage - # Implements the DataMapper Pattern to store binary data on disk + # The VersionedDisk adapter implements versioned storage on disk by storing + # the timestamp of the file's creation as part of the file name + # (v-timestamp-filename.jpg). If the + # current file is deleted it creates a DeletionMarker, which is an empty file + # with "deletionmarker" in the name of the file. class VersionedDisk class VersionId attr_reader :id From 97f979fe1ffd4f79733cc84e8bbaa90de0da7fa1 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 6 Sep 2023 10:05:51 -0700 Subject: [PATCH 16/33] Coverage --- lib/valkyrie/specs/shared_specs/storage_adapter.rb | 4 ++++ lib/valkyrie/storage/versioned_disk.rb | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 15ffca709..6bff757f8 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -18,6 +18,10 @@ class Valkyrie::Specs::CustomResource < Valkyrie::Resource it { is_expected.to respond_to(:upload).with_keywords(:file, :resource, :original_filename) } it { is_expected.to respond_to(:supports?) } + it "returns false for non-existing features" do + expect(storage_adapter.supports?(:bad_feature_not_real_dont_implement)).to eq false + end + it "can upload a file which is just an IO" do io_file = Tempfile.new('temp_io') io_file.write "Stuff" diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index ba800ab15..ccd82f1ae 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -48,10 +48,6 @@ def versioned? string_id.include?("v-") end - def file_root - string_id.split("v-").first - end - def version string_id.split("v-").last.split("-", 2).first end From 73e90a3da2ae13e566bfdc6b55139bd39defd4e8 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Wed, 13 Sep 2023 16:58:16 -0700 Subject: [PATCH 17/33] Get Fedora 5 running locally. --- .lando.yml | 13 ++++++++++--- spec/support/fedora_helper.rb | 2 +- spec/valkyrie/storage/fedora_spec.rb | 6 +++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.lando.yml b/.lando.yml index 6aecf12e6..49f9b5dfb 100644 --- a/.lando.yml +++ b/.lando.yml @@ -24,19 +24,26 @@ services: - fedora4:/data ports: - 8988:8080 - portforward: true + environment: + CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC" + portforward: 8988 valkyrie_fedora_5: type: compose app_mount: false volumes: fedora5: services: - image: samvera/fcrepo4:5.1.0 - command: /fedora-entrypoint.sh + image: fcrepo/fcrepo:5.1.1-multiplatform + command: + - "catalina.sh" + - "run" volumes: - fedora5:/data ports: - 8998:8080 + environment: + CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" + JAVA_OPTS: "-Dfcrepo.dynamic.jms.port=61620 -Dfcrepo.dynamic.stomp.port=61617 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" portforward: true valkyrie_fedora_6: type: compose diff --git a/spec/support/fedora_helper.rb b/spec/support/fedora_helper.rb index 11c7a4f23..5cd042ac1 100644 --- a/spec/support/fedora_helper.rb +++ b/spec/support/fedora_helper.rb @@ -9,7 +9,7 @@ def fedora_adapter_config(base_path:, schema: nil, fedora_version: 4) elsif fedora_version == 6 port = ENV["FEDORA_6_PORT"] || 8978 end - connection_url = fedora_version == 6 ? "/fcrepo/rest" : "/rest" + connection_url = fedora_version == 6 || (fedora_version == 5 && !ENV["CI"]) ? "/fcrepo/rest" : "/rest" opts = { base_path: base_path, connection: ::Ldp::Client.new(faraday_client("http://#{fedora_auth}localhost:#{port}#{connection_url}")), diff --git a/spec/valkyrie/storage/fedora_spec.rb b/spec/valkyrie/storage/fedora_spec.rb index c3433b1a4..96e6df890 100644 --- a/spec/valkyrie/storage/fedora_spec.rb +++ b/spec/valkyrie/storage/fedora_spec.rb @@ -94,7 +94,7 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = 'fedora://localhost:8998/rest/test/AN1D4UHA/original' + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN1D4UHA/original" expect(uploaded_file.id.to_s).to eq expected_uri end end @@ -103,7 +103,7 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: '/', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = RDF::URI.new('fedora://localhost:8998/rest/AN1D4UHA/original') + expected_uri = RDF::URI.new("fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/AN1D4UHA/original") expect(uploaded_file.id.to_s).to eq expected_uri end end @@ -131,7 +131,7 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = 'fedora://localhost:8998/rest/test/AN/1D/4U/HA/AN1D4UHA/original' + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN/1D/4U/HA/AN1D4UHA/original" expect(uploaded_file.id.to_s).to eq expected_uri end end From c82a3cbfc5e7e01efb82bf783201db839a2571ae Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 11:05:02 -0700 Subject: [PATCH 18/33] WIP --- .../specs/shared_specs/storage_adapter.rb | 2 +- lib/valkyrie/storage/fedora.rb | 78 +++++++++++++++++-- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 6bff757f8..7eab012b3 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -85,7 +85,7 @@ def open_files it "can upload and find new versions" do pending "Versioning not supported" unless storage_adapter.supports?(:versions) - resource = Valkyrie::Specs::CustomResource.new(id: "test") + resource = Valkyrie::Specs::CustomResource.new(id: "test2") uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) expect(uploaded_file.version_id).not_to be_blank diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index de9c8f0d6..70b6ace6e 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -21,7 +21,8 @@ def handles?(id:) # @param feature [Symbol] Feature to test for. # @return [Boolean] true if the adapter supports the given feature - def supports?(_feature) + def supports?(feature) + return true if feature == :versions false end @@ -30,7 +31,7 @@ def supports?(_feature) # @return [Valkyrie::StorageAdapter::StreamFile] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - Valkyrie::StorageAdapter::StreamFile.new(id: id, io: response(id: id)) + perform_find(id: id) end # @param file [IO] @@ -43,18 +44,76 @@ def find_by(id:) def upload(file:, original_filename:, resource:, content_type: "application/octet-stream", # rubocop:disable Metrics/ParameterLists resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments) identifier = resource_uri_transformer.call(resource, base_url) + '/original' + upload_file(fedora_uri: identifier, io: file, content_type: content_type, original_filename: original_filename) + version_id = mint_version(identifier, latest_version(identifier)) + perform_find(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) + end + + def upload_version(id:, file:) + uri = fedora_identifier(id: id) + upload_file(fedora_uri: uri, io: file) + version_id = mint_version(uri, latest_version(uri)) + perform_find(id: Valkyrie::ID.new(uri.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) + end + + def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", original_filename: "default") sha1 = [5, 6].include?(fedora_version) ? "sha" : "sha1" connection.http.put do |request| - request.url identifier + request.url fedora_uri request.headers['Content-Type'] = content_type - request.headers['Content-Length'] = file.length.to_s + request.headers['Content-Length'] = io.length.to_s request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\"" - request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(file)}" + request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" request.headers['link'] = "; rel=\"type\"" - io = Faraday::UploadIO.new(file, content_type, original_filename) + io = Faraday::UploadIO.new(io, content_type, original_filename) request.body = io end - find_by(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL))) + end + + def find_versions(id:) + uri = fedora_identifier(id: id) + version_list = version_list(uri) + version_list.map do |version| + id = valkyrie_identifier(uri: version["@id"]) + perform_find(id: id, version_id: id) + end + end + + def version_list(fedora_uri) + version_list = connection.http.get do |request| + request.url "#{fedora_uri}/fcr:versions" + request.headers["Accept"] = "application/ld+json" + end + return [] unless version_list.success? + JSON.parse(version_list.body)&.first&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", []) + end + + def latest_version(identifier) + version_list = version_list(identifier) + return "version1" if version_list.blank? + last_version = version_list.first["@id"] + last_version_number = last_version.split("/").last.gsub("version", "").to_i + "version#{last_version_number + 1}" + end + + def perform_find(id:, version_id: nil) + current_id = Valkyrie::ID.new(id.to_s.split("/fcr:versions").first) + version_id ||= id if id != current_id + Valkyrie::StorageAdapter::StreamFile.new(id: current_id, io: response(id: id), version_id: version_id) + end + + # @param identifier [String] Fedora URI to mint a version for. + # @return [Valkyrie::ID] version_id of the minted version. + # Versions are created AFTER content is uploaded. + def mint_version(identifier, version_name = "version1") + response = connection.http.post do |request| + request.url "#{identifier}/fcr:versions" + request.headers['Slug'] = version_name + end + # If there's a deletion marker, don't return anything. + return nil if response.status == 410 + raise "Version unable to be created" unless response.status == 201 + valkyrie_identifier(uri: response.headers["location"].gsub("/fcr:metadata", "")) end # Delete the file in Fedora associated with the given identifier. @@ -87,6 +146,11 @@ def fedora_identifier(id:) RDF::URI(identifier) end + def valkyrie_identifier(uri:) + id = uri.to_s.sub("http://", PROTOCOL) + Valkyrie::ID.new(id) + end + private # @return [IOProxy] From 3ba8e96fdb3779569d0476ae7c7766db6b723851 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 11:16:48 -0700 Subject: [PATCH 19/33] Deleting really deletes. --- .../specs/shared_specs/storage_adapter.rb | 22 ++++++------------- lib/valkyrie/storage/memory.rb | 7 ++---- lib/valkyrie/storage/versioned_disk.rb | 10 ++------- 3 files changed, 11 insertions(+), 28 deletions(-) diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 7eab012b3..91dbcb244 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -114,18 +114,11 @@ def open_files expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id - # Delete versions - - # Deleting a version should leave the current version + # Deleting a version should leave the current versions storage_adapter.delete(id: uploaded_file.version_id) expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1 expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound - # Deleting the current record should push it into the versions history. - storage_adapter.delete(id: new_version.id) - expect { storage_adapter.find_by(id: new_version.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound - expect(storage_adapter.find_versions(id: new_version.id).length).to eq 1 - # Restoring a previous version is just pumping its file into upload_version newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) expect(newest_version.version_id).not_to eq new_version.id @@ -137,13 +130,12 @@ def open_files expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id expect(storage_adapter.find_versions(id: newest_version.id).length).to eq 3 - # I can delete all versions. - - storage_adapter.delete(id: newest_version.id, purge_versions: true) - - expect(storage_adapter.find_versions(id: new_version.id)).to eq [] - expect { storage_adapter.find_by(id: newest_version.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound - + # NOTE: We originally wanted deleting the current record to push it into the + # versions history, but FCRepo 4/5/6 doesn't work that way, so we changed to + # instead make deleting delete everything. + storage_adapter.delete(id: new_version.id) + expect { storage_adapter.find_by(id: new_version.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + expect(storage_adapter.find_versions(id: new_version.id).length).to eq 0 ensure f&.close end diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index bc8d2bed6..7bd5d202c 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -88,18 +88,15 @@ def id_and_version(id) # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] - def delete(id:, purge_versions: false) + def delete(id:) base_id, version = id_and_version(id) if version && cache[base_id][:current]&.version_id != id cache[base_id][:versions].reject! do |file| file.version_id == id end else - cache[base_id][:versions] ||= [] - cache[base_id][:versions].prepend(cache[base_id][:current]) - cache[base_id][:current] = nil + cache.delete(base_id) end - cache.delete(base_id) if purge_versions nil end end diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index ccd82f1ae..c490b2670 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -130,7 +130,7 @@ def file_path(version_id) # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) version_id = version_id(id) - raise Valkyrie::StorageAdapter::FileNotFound if version_id.deletion_marker? + raise Valkyrie::StorageAdapter::FileNotFound if version_id.nil? || version_id&.deletion_marker? Valkyrie::StorageAdapter::File.new(id: version_id.current_reference_id.id, io: ::Valkyrie::Storage::Disk::LazyFile.open(version_id.file_path, 'rb'), version_id: version_id.id) rescue Errno::ENOENT raise Valkyrie::StorageAdapter::FileNotFound @@ -146,16 +146,10 @@ def version_id(id) # @param id [Valkyrie::ID] def delete(id:, purge_versions: false) id = version_id(id).resolve_current - if purge_versions + if id.current? id.version_files.each do |version_id| FileUtils.rm_rf(version_id.file_path) end - elsif id.current? - # Leave a deletion marker behind. - version_timestamp = current_timestamp - new_path = Pathname.new(id.file_path.gsub(id.version, "#{version_timestamp}-deletionmarker")) - FileUtils.mkdir_p(new_path.parent) - File.open(new_path, 'w') { |f| f.puts "Deleted" } elsif File.exist?(id.file_path) FileUtils.rm_rf(id.file_path) end From 0b21309395d9afc1039ccf1ef9d36435756b27e9 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 11:59:36 -0700 Subject: [PATCH 20/33] Fix for Fedora 5. --- lib/valkyrie/storage/fedora.rb | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index 70b6ace6e..993a0d102 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -61,9 +61,9 @@ def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", orig connection.http.put do |request| request.url fedora_uri request.headers['Content-Type'] = content_type - request.headers['Content-Length'] = io.length.to_s + request.headers['Content-Length'] = io.length.to_s if io.respond_to?(:length) request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\"" - request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" + request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" if io.respond_to?(:to_str) request.headers['link'] = "; rel=\"type\"" io = Faraday::UploadIO.new(io, content_type, original_filename) request.body = io @@ -85,10 +85,16 @@ def version_list(fedora_uri) request.headers["Accept"] = "application/ld+json" end return [] unless version_list.success? - JSON.parse(version_list.body)&.first&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", []) + version_graph = JSON.parse(version_list.body)&.first + if fedora_version == 4 + version_graph&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", []) + else + version_graph&.fetch("http://www.w3.org/ns/ldp#contains", [])&.sort_by { |x| x["@id"] }&.reverse + end end def latest_version(identifier) + return :not_applicable if fedora_version != 4 version_list = version_list(identifier) return "version1" if version_list.blank? last_version = version_list.first["@id"] @@ -96,9 +102,20 @@ def latest_version(identifier) "version#{last_version_number + 1}" end + # @param [Valkyrie::ID] id A storage ID that's not a version, to get the + # version ID of. + def current_version_id(id:) + version_list = version_list(fedora_identifier(id: id)) + return nil if version_list.blank? + valkyrie_identifier(uri: version_list.first["@id"]) + end + def perform_find(id:, version_id: nil) current_id = Valkyrie::ID.new(id.to_s.split("/fcr:versions").first) version_id ||= id if id != current_id + # No version got passed and we're asking for a current_id, gotta get the + # version ID + return perform_find(id: current_id, version_id: (current_version_id(id: id) || :empty)) if version_id.nil? Valkyrie::StorageAdapter::StreamFile.new(id: current_id, io: response(id: id), version_id: version_id) end @@ -108,10 +125,15 @@ def perform_find(id:, version_id: nil) def mint_version(identifier, version_name = "version1") response = connection.http.post do |request| request.url "#{identifier}/fcr:versions" - request.headers['Slug'] = version_name + request.headers['Slug'] = version_name if fedora_version == 4 end # If there's a deletion marker, don't return anything. return nil if response.status == 410 + # This is awful, but versioning is locked to per-second increments. + if response.status == 409 + sleep(0.5) + return mint_version(identifier, version_name) + end raise "Version unable to be created" unless response.status == 201 valkyrie_identifier(uri: response.headers["location"].gsub("/fcr:metadata", "")) end From 2d960e70de5b1dd3a81b494b25c0cdd0f3017a66 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:43:52 -0700 Subject: [PATCH 21/33] Add versioning support to Fedora 4/5/6 --- .circleci/config.yml | 2 +- .lando.yml | 2 +- .rubocop_todo.yml | 1 + .../specs/shared_specs/storage_adapter.rb | 15 +-- lib/valkyrie/storage/fedora.rb | 8 +- lib/valkyrie/storage/memory.rb | 2 + lib/valkyrie/storage/versioned_disk.rb | 2 +- spec/valkyrie/storage/fedora_spec.rb | 94 +++++++++++++++++++ 8 files changed, 116 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 73343ddf7..15360c065 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,7 +28,7 @@ jobs: environment: CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC" JAVA_OPTIONS: "-Djetty.http.port=8998 -Dfcrepo.dynamic.jms.port=61618 -Dfcrepo.dynamic.stomp.port=61614" - - image: fcrepo/fcrepo:6.0.0 + - image: fcrepo/fcrepo:6.4.0 environment: CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" JAVA_OPTS: "-Djetty.http.port=8978 -Dfcrepo.dynamic.jms.port=61619 -Dfcrepo.dynamic.stomp.port=61615 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" diff --git a/.lando.yml b/.lando.yml index 49f9b5dfb..0ab25dab0 100644 --- a/.lando.yml +++ b/.lando.yml @@ -51,7 +51,7 @@ services: volumes: fedora6: services: - image: fcrepo/fcrepo:6.0.0 + image: fcrepo/fcrepo:6.4.0 command: - "catalina.sh" - "run" diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 0e52a31cf..3e318b854 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -3,6 +3,7 @@ Metrics/ClassLength: - 'lib/valkyrie/persistence/fedora/persister.rb' - 'lib/valkyrie/persistence/fedora/query_service.rb' - 'lib/valkyrie/persistence/postgres/query_service.rb' + - 'lib/valkyrie/storage/fedora.rb' Metrics/MethodLength: Exclude: diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index 91dbcb244..becdaa35f 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -55,7 +55,7 @@ def open_files end it "can upload, validate, re-fetch, and delete a file" do - resource = Valkyrie::Specs::CustomResource.new(id: "test") + resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}") sha1 = Digest::SHA1.file(file).to_s size = file.size expect(uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true)).to be_kind_of Valkyrie::StorageAdapter::File @@ -85,7 +85,7 @@ def open_files it "can upload and find new versions" do pending "Versioning not supported" unless storage_adapter.supports?(:versions) - resource = Valkyrie::Specs::CustomResource.new(id: "test2") + resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}") uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) expect(uploaded_file.version_id).not_to be_blank @@ -115,9 +115,12 @@ def open_files expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id # Deleting a version should leave the current versions - storage_adapter.delete(id: uploaded_file.version_id) - expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1 - expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + if storage_adapter.supports?(:version_deletion) + storage_adapter.delete(id: uploaded_file.version_id) + expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1 + expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + end + current_length = storage_adapter.find_versions(id: new_version.id).length # Restoring a previous version is just pumping its file into upload_version newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) @@ -128,7 +131,7 @@ def open_files newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) expect(newest_version.version_id).not_to eq new_version.id expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id - expect(storage_adapter.find_versions(id: newest_version.id).length).to eq 3 + expect(storage_adapter.find_versions(id: newest_version.id).length).to eq current_length + 2 # NOTE: We originally wanted deleting the current record to push it into the # versions history, but FCRepo 4/5/6 doesn't work that way, so we changed to diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index 993a0d102..db5a1b3ef 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -23,6 +23,7 @@ def handles?(id:) # @return [Boolean] true if the adapter supports the given feature def supports?(feature) return true if feature == :versions + return true if feature == :version_deletion && fedora_version != 6 false end @@ -45,12 +46,17 @@ def upload(file:, original_filename:, resource:, content_type: "application/octe resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments) identifier = resource_uri_transformer.call(resource, base_url) + '/original' upload_file(fedora_uri: identifier, io: file, content_type: content_type, original_filename: original_filename) - version_id = mint_version(identifier, latest_version(identifier)) + version_id = current_version_id(id: valkyrie_identifier(uri: identifier)) || mint_version(identifier, latest_version(identifier)) perform_find(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) end def upload_version(id:, file:) uri = fedora_identifier(id: id) + # Auto versioning is on, so have to sleep if it's too soon after last + # upload. + if fedora_version == 6 + return upload_version(id: id, file: file) if current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S") + end upload_file(fedora_uri: uri, io: file) version_id = mint_version(uri, latest_version(uri)) perform_find(id: Valkyrie::ID.new(uri.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 7bd5d202c..91411911b 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -76,6 +76,8 @@ def supports?(feature) case feature when :versions true + when :version_deletion + true else false end diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index c490b2670..842c521fc 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -116,7 +116,7 @@ def handles?(id:) # @param feature [Symbol] Feature to test for. # @return [Boolean] true if the adapter supports the given feature def supports?(feature) - return true if feature == :versions + return true if feature == :versions || feature == :version_deletion false end diff --git a/spec/valkyrie/storage/fedora_spec.rb b/spec/valkyrie/storage/fedora_spec.rb index 96e6df890..557e082e0 100644 --- a/spec/valkyrie/storage/fedora_spec.rb +++ b/spec/valkyrie/storage/fedora_spec.rb @@ -138,6 +138,100 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource end end + context "fedora 6" do + before(:all) do + # Start from a clean fedora + wipe_fedora!(base_path: "test", fedora_version: 6) + end + + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + + it_behaves_like "a Valkyrie::StorageAdapter" + + context "when uploading with a content_type" do + it "passes that on" do + io_file = file.tempfile + + resource = Valkyrie::Specs::FedoraCustomResource.new(id: SecureRandom.uuid) + + expect(uploaded_file = storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff" + )).to be_kind_of Valkyrie::StorageAdapter::File + + uri = storage_adapter.fedora_identifier(id: uploaded_file.id) + response = storage_adapter.connection.http.head(uri.to_s) + + expect(response.headers["content-type"]).to eq "image/tiff" + end + end + + context 'testing resource uri transformer' do + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + let(:io_file) { file.tempfile } + let(:resource) { Valkyrie::Specs::FedoraCustomResource.new(id: 'AN1D4UHA') } + let(:uploaded_file) do + storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff" + ) + end + context 'when using default transformer' do + context 'and basepath is passed in' do + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN1D4UHA/original" + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + + context "when basepath uses default (e.g. '/')" do + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: '/', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = RDF::URI.new("fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/AN1D4UHA/original") + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + end + + context 'when transformer is passed in' do + let(:uploaded_file) do + storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff", + resource_uri_transformer: uri_transformer + ) + end + let(:uri_transformer) do + lambda do |resource, base_url| + id = CGI.escape(resource.id.to_s) + head = id.split('/').first + head.gsub!(/#.*/, '') + RDF::URI.new(base_url + (head.scan(/..?/).first(4) + [id]).join('/')) + end + end + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN/1D/4U/HA/AN1D4UHA/original" + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + end + end + context 'no ldp gem' do let(:error) { Gem::LoadError.new.tap { |err| err.name = 'ldp' } } let(:error_message) do From bbb882671f45327d31470d9d364b1a257e2d7520 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:45:16 -0700 Subject: [PATCH 22/33] Update lib/valkyrie/storage/versioned_disk.rb Co-authored-by: hackartisan <845363+hackartisan@users.noreply.github.com> --- lib/valkyrie/storage/versioned_disk.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 842c521fc..d4d0c77b3 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -16,6 +16,7 @@ def current_reference_id self.class.new(Valkyrie::ID.new(string_id.gsub(version, "current"))) end + # @return [VersionID] the version_id for the current file def resolve_current return self unless reference? version_files.first From 056848feca49051d022b751541da39a3dd58515e Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:45:23 -0700 Subject: [PATCH 23/33] Update lib/valkyrie/storage/versioned_disk.rb Co-authored-by: hackartisan <845363+hackartisan@users.noreply.github.com> --- lib/valkyrie/storage/versioned_disk.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index d4d0c77b3..0ad773247 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -41,6 +41,7 @@ def current? version_files.first.id == id end + # @return [Boolean] Whether this id is referential (e.g. "current") or absolute (e.g. a timestamp) def reference? version == "current" end From 4b58a745dcbf1cc4fb80460ad52a2cdfc4c35ad6 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:45:31 -0700 Subject: [PATCH 24/33] Update lib/valkyrie/storage/versioned_disk.rb Co-authored-by: hackartisan <845363+hackartisan@users.noreply.github.com> --- lib/valkyrie/storage/versioned_disk.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 0ad773247..fc5f8143e 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -6,6 +6,11 @@ module Valkyrie::Storage # current file is deleted it creates a DeletionMarker, which is an empty file # with "deletionmarker" in the name of the file. class VersionedDisk + # A small value class that holds a version id and methods for knowing things about it. + # Examples of version ids in this adapter: + # * "versiondisk://te/st/test/v-current-filename.jpg" (never actually saved this way on disk, just used as a reference) + # * "versiondisk://te/st/test/v-1694195675462560794-filename.jpg" (this timestamped form would be saved on disk) + # * "versiondisk://te/st/test/v-1694195675462560794-deletionmarker-filename.jpg" (this file is saved on disk but empty) class VersionId attr_reader :id def initialize(id) From 52b006ad88effafc55eb21926a624fe73b1ed406 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:48:08 -0700 Subject: [PATCH 25/33] Add comment. --- lib/valkyrie/storage/versioned_disk.rb | 126 +++++++++++++------------ 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index fc5f8143e..f8b16780e 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -6,67 +6,6 @@ module Valkyrie::Storage # current file is deleted it creates a DeletionMarker, which is an empty file # with "deletionmarker" in the name of the file. class VersionedDisk - # A small value class that holds a version id and methods for knowing things about it. - # Examples of version ids in this adapter: - # * "versiondisk://te/st/test/v-current-filename.jpg" (never actually saved this way on disk, just used as a reference) - # * "versiondisk://te/st/test/v-1694195675462560794-filename.jpg" (this timestamped form would be saved on disk) - # * "versiondisk://te/st/test/v-1694195675462560794-deletionmarker-filename.jpg" (this file is saved on disk but empty) - class VersionId - attr_reader :id - def initialize(id) - @id = id - end - - def current_reference_id - self.class.new(Valkyrie::ID.new(string_id.gsub(version, "current"))) - end - - # @return [VersionID] the version_id for the current file - def resolve_current - return self unless reference? - version_files.first - end - - def file_path - @file_path ||= string_id.gsub(/^versiondisk:\/\//, '') - end - - def version_files - root = Pathname.new(file_path) - root.parent.children.select { |file| file.basename.to_s.end_with?(filename) }.sort.reverse.map do |file| - VersionId.new(Valkyrie::ID.new("versiondisk://#{file}")) - end - end - - def deletion_marker? - string_id.include?("deletionmarker") - end - - def current? - version_files.first.id == id - end - - # @return [Boolean] Whether this id is referential (e.g. "current") or absolute (e.g. a timestamp) - def reference? - version == "current" - end - - def versioned? - string_id.include?("v-") - end - - def version - string_id.split("v-").last.split("-", 2).first - end - - def filename - string_id.split("v-").last.split("-", 2).last.gsub("deletionmarker-", "") - end - - def string_id - id.to_s - end - end attr_reader :base_path, :path_generator, :file_mover def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:cp)) @base_path = Pathname.new(base_path.to_s) @@ -93,6 +32,7 @@ def current_timestamp def upload_version(id:, file:) version_timestamp = current_timestamp + # Get the existing version_id so we can calculate the next path from it. version_id = version_id(id) version_id = version_id.version_files[1] if version_id.deletion_marker? existing_path = version_id.file_path @@ -143,6 +83,8 @@ def find_by(id:) raise Valkyrie::StorageAdapter::FileNotFound end + # @return VersionId A VersionId value that's resolved a current reference, + # so we can access the `version_id` and current reference. def version_id(id) id = VersionId.new(id) return id unless id.versioned? @@ -161,5 +103,67 @@ def delete(id:, purge_versions: false) FileUtils.rm_rf(id.file_path) end end + + # A small value class that holds a version id and methods for knowing things about it. + # Examples of version ids in this adapter: + # * "versiondisk://te/st/test/v-current-filename.jpg" (never actually saved this way on disk, just used as a reference) + # * "versiondisk://te/st/test/v-1694195675462560794-filename.jpg" (this timestamped form would be saved on disk) + # * "versiondisk://te/st/test/v-1694195675462560794-deletionmarker-filename.jpg" (this file is saved on disk but empty) + class VersionId + attr_reader :id + def initialize(id) + @id = id + end + + def current_reference_id + self.class.new(Valkyrie::ID.new(string_id.gsub(version, "current"))) + end + + # @return [VersionID] the version_id for the current file + def resolve_current + return self unless reference? + version_files.first + end + + def file_path + @file_path ||= string_id.gsub(/^versiondisk:\/\//, '') + end + + def version_files + root = Pathname.new(file_path) + root.parent.children.select { |file| file.basename.to_s.end_with?(filename) }.sort.reverse.map do |file| + VersionId.new(Valkyrie::ID.new("versiondisk://#{file}")) + end + end + + def deletion_marker? + string_id.include?("deletionmarker") + end + + def current? + version_files.first.id == id + end + + # @return [Boolean] Whether this id is referential (e.g. "current") or absolute (e.g. a timestamp) + def reference? + version == "current" + end + + def versioned? + string_id.include?("v-") + end + + def version + string_id.split("v-").last.split("-", 2).first + end + + def filename + string_id.split("v-").last.split("-", 2).last.gsub("deletionmarker-", "") + end + + def string_id + id.to_s + end + end end end From cfe6ec6a3cc14899ef7385277a4bbd1854496b34 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:48:43 -0700 Subject: [PATCH 26/33] Rename variable. --- lib/valkyrie/storage/versioned_disk.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index f8b16780e..5ac053f33 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -33,10 +33,10 @@ def current_timestamp def upload_version(id:, file:) version_timestamp = current_timestamp # Get the existing version_id so we can calculate the next path from it. - version_id = version_id(id) - version_id = version_id.version_files[1] if version_id.deletion_marker? - existing_path = version_id.file_path - new_path = Pathname.new(existing_path.gsub(version_id.version, version_timestamp.to_s)) + current_version_id = version_id(id) + current_version_id = current_version_id.version_files[1] if current_version_id.deletion_marker? + existing_path = current_version_id.file_path + new_path = Pathname.new(existing_path.gsub(current_version_id.version, version_timestamp.to_s)) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) From 65b184f84aba8f8ba806c8791925c2c4e3466088 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Thu, 14 Sep 2023 12:51:09 -0700 Subject: [PATCH 27/33] Actually sleep. --- lib/valkyrie/storage/fedora.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index db5a1b3ef..b02ef6b96 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -54,8 +54,9 @@ def upload_version(id:, file:) uri = fedora_identifier(id: id) # Auto versioning is on, so have to sleep if it's too soon after last # upload. - if fedora_version == 6 - return upload_version(id: id, file: file) if current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S") + if fedora_version == 6 && current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S") + sleep(0.5) + return upload_version(id: id, file: file) end upload_file(fedora_uri: uri, io: file) version_id = mint_version(uri, latest_version(uri)) From 54b5dadbfff674a4e78a7c6503fd686d24974b20 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 09:44:46 -0700 Subject: [PATCH 28/33] Check for path existence in case test suite runs too fast. --- lib/valkyrie/storage/versioned_disk.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 5ac053f33..47933ed7c 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -18,16 +18,19 @@ def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedSt # @param resource [Valkyrie::Resource] # @param _extra_arguments [Hash] additional arguments which may be passed to other adapters # @return [Valkyrie::StorageAdapter::File] - def upload(file:, original_filename:, resource: nil, **_extra_arguments) + def upload(file:, original_filename:, resource: nil, **extra_arguments) version_timestamp = current_timestamp new_path = path_generator.generate(resource: resource, file: file, original_filename: "v-#{version_timestamp}-#{original_filename}") + # If we've gone faster than milliseconds here, re-call. Probably only an + # issue for test suites. + return upload(file: file, original_filename: original_filename, resource: resource, **extra_arguments) if File.exist?(new_path) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) end def current_timestamp - Time.now.strftime("%s%N") + Time.now.strftime("%s%L") end def upload_version(id:, file:) @@ -37,6 +40,8 @@ def upload_version(id:, file:) current_version_id = current_version_id.version_files[1] if current_version_id.deletion_marker? existing_path = current_version_id.file_path new_path = Pathname.new(existing_path.gsub(current_version_id.version, version_timestamp.to_s)) + # If we've gone faster than milliseconds here, re-call. + return upload_version(id: id, file: file) if File.exist?(new_path) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) From b3e91aeb0e9f03fdcb1e225e998d92b2960f1523 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 10:00:25 -0700 Subject: [PATCH 29/33] Only ever sleep once. --- lib/valkyrie/storage/versioned_disk.rb | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 47933ed7c..b71074c75 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -18,12 +18,12 @@ def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedSt # @param resource [Valkyrie::Resource] # @param _extra_arguments [Hash] additional arguments which may be passed to other adapters # @return [Valkyrie::StorageAdapter::File] - def upload(file:, original_filename:, resource: nil, **extra_arguments) + def upload(file:, original_filename:, resource: nil, paused: false, **extra_arguments) version_timestamp = current_timestamp new_path = path_generator.generate(resource: resource, file: file, original_filename: "v-#{version_timestamp}-#{original_filename}") - # If we've gone faster than milliseconds here, re-call. Probably only an - # issue for test suites. - return upload(file: file, original_filename: original_filename, resource: resource, **extra_arguments) if File.exist?(new_path) + # If we've gone faster than milliseconds here, pause a millisecond and + # re-call. Probably only an issue for test suites. + return sleep(0.001) && upload(file: file, original_filename: original_filename, resource: resource, paused: true, **extra_arguments) if !paused && File.exist?(new_path) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) @@ -33,15 +33,16 @@ def current_timestamp Time.now.strftime("%s%L") end - def upload_version(id:, file:) + def upload_version(id:, file:, paused: false) version_timestamp = current_timestamp # Get the existing version_id so we can calculate the next path from it. current_version_id = version_id(id) current_version_id = current_version_id.version_files[1] if current_version_id.deletion_marker? existing_path = current_version_id.file_path new_path = Pathname.new(existing_path.gsub(current_version_id.version, version_timestamp.to_s)) - # If we've gone faster than milliseconds here, re-call. - return upload_version(id: id, file: file) if File.exist?(new_path) + # If we've gone faster than milliseconds here, pause a millisecond and + # re-call. + return sleep(0.001) && upload_version(id: id, file: file, paused: true) if !paused && File.exist?(new_path) FileUtils.mkdir_p(new_path.parent) file_mover.call(file.try(:path) || file.try(:disk_path), new_path) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) From ebcce3d3c19c7fc728fb2213b4e23992e229dcf1 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 12:37:32 -0700 Subject: [PATCH 30/33] Organize public methods higher and document. --- lib/valkyrie/storage/versioned_disk.rb | 55 +++++++++++++++----------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index b71074c75..0baaf06be 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -33,6 +33,11 @@ def current_timestamp Time.now.strftime("%s%L") end + # @param id [Valkyrie::ID] ID of the Valkyrie::StorageAdapter::File to + # version. + # @param file [IO] + # @param paused [Boolean] set to true when upload_version had to pause for a + # millisecond to get a later timestamp. Internal only - do not set. def upload_version(id:, file:, paused: false) version_timestamp = current_timestamp # Get the existing version_id so we can calculate the next path from it. @@ -48,18 +53,6 @@ def upload_version(id:, file:, paused: false) find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) end - def find_versions(id:) - version_files(id: id).select { |x| !x.to_s.include?("deletionmarker") }.map do |file| - find_by(id: Valkyrie::ID.new("versiondisk://#{file}")) - end - end - - def version_files(id:) - root = Pathname.new(file_path(id)) - id = VersionId.new(id) - root.parent.children.select { |file| file.basename.to_s.end_with?(id.filename) }.sort.reverse - end - # @param id [Valkyrie::ID] # @return [Boolean] true if this adapter can handle this type of identifer def handles?(id:) @@ -73,10 +66,6 @@ def supports?(feature) false end - def file_path(version_id) - version_id.to_s.gsub(/^versiondisk:\/\//, '') - end - # Return the file associated with the given identifier # @param id [Valkyrie::ID] # @return [Valkyrie::StorageAdapter::File] @@ -89,14 +78,6 @@ def find_by(id:) raise Valkyrie::StorageAdapter::FileNotFound end - # @return VersionId A VersionId value that's resolved a current reference, - # so we can access the `version_id` and current reference. - def version_id(id) - id = VersionId.new(id) - return id unless id.versioned? - id.resolve_current - end - # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:, purge_versions: false) @@ -110,6 +91,32 @@ def delete(id:, purge_versions: false) end end + # @param id [Valkyrie::ID] + # @return [Array] + def find_versions(id:) + version_files(id: id).select { |x| !x.to_s.include?("deletionmarker") }.map do |file| + find_by(id: Valkyrie::ID.new("versiondisk://#{file}")) + end + end + + def version_files(id:) + root = Pathname.new(file_path(id)) + id = VersionId.new(id) + root.parent.children.select { |file| file.basename.to_s.end_with?(id.filename) }.sort.reverse + end + + def file_path(version_id) + version_id.to_s.gsub(/^versiondisk:\/\//, '') + end + + # @return VersionId A VersionId value that's resolved a current reference, + # so we can access the `version_id` and current reference. + def version_id(id) + id = VersionId.new(id) + return id unless id.versioned? + id.resolve_current + end + # A small value class that holds a version id and methods for knowing things about it. # Examples of version ids in this adapter: # * "versiondisk://te/st/test/v-current-filename.jpg" (never actually saved this way on disk, just used as a reference) From 4baf01d2c9e9f5bbaded8853ee425ddd7fc0d775 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 12:42:52 -0700 Subject: [PATCH 31/33] Organize and add comments. --- lib/valkyrie/storage/fedora.rb | 62 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index b02ef6b96..2c3716111 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -23,6 +23,7 @@ def handles?(id:) # @return [Boolean] true if the adapter supports the given feature def supports?(feature) return true if feature == :versions + # Fedora 6 auto versions and you can't delete versions. return true if feature == :version_deletion && fedora_version != 6 false end @@ -46,13 +47,18 @@ def upload(file:, original_filename:, resource:, content_type: "application/octe resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments) identifier = resource_uri_transformer.call(resource, base_url) + '/original' upload_file(fedora_uri: identifier, io: file, content_type: content_type, original_filename: original_filename) + # Fedora 6 auto versions, so check to see if there's a version for this + # initial upload. If not, then mint one (fedora 4/5) version_id = current_version_id(id: valkyrie_identifier(uri: identifier)) || mint_version(identifier, latest_version(identifier)) perform_find(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) end + # @param id [Valkyrie::ID] ID of the Valkyrie::StorageAdapter::StreamFile to + # version. + # @param file [IO] def upload_version(id:, file:) uri = fedora_identifier(id: id) - # Auto versioning is on, so have to sleep if it's too soon after last + # Fedora 6 has auto versioning, so have to sleep if it's too soon after last # upload. if fedora_version == 6 && current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S") sleep(0.5) @@ -63,20 +69,8 @@ def upload_version(id:, file:) perform_find(id: Valkyrie::ID.new(uri.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) end - def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", original_filename: "default") - sha1 = [5, 6].include?(fedora_version) ? "sha" : "sha1" - connection.http.put do |request| - request.url fedora_uri - request.headers['Content-Type'] = content_type - request.headers['Content-Length'] = io.length.to_s if io.respond_to?(:length) - request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\"" - request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" if io.respond_to?(:to_str) - request.headers['link'] = "; rel=\"type\"" - io = Faraday::UploadIO.new(io, content_type, original_filename) - request.body = io - end - end - + # @param id [Valkyrie::ID] + # @return [Array] def find_versions(id:) uri = fedora_identifier(id: id) version_list = version_list(uri) @@ -86,6 +80,12 @@ def find_versions(id:) end end + # Delete the file in Fedora associated with the given identifier. + # @param id [Valkyrie::ID] + def delete(id:) + connection.http.delete(fedora_identifier(id: id)) + end + def version_list(fedora_uri) version_list = connection.http.get do |request| request.url "#{fedora_uri}/fcr:versions" @@ -96,11 +96,29 @@ def version_list(fedora_uri) if fedora_version == 4 version_graph&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", []) else + # Fedora 5/6 use Memento. version_graph&.fetch("http://www.w3.org/ns/ldp#contains", [])&.sort_by { |x| x["@id"] }&.reverse end end + def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", original_filename: "default") + sha1 = [5, 6].include?(fedora_version) ? "sha" : "sha1" + connection.http.put do |request| + request.url fedora_uri + request.headers['Content-Type'] = content_type + request.headers['Content-Length'] = io.length.to_s if io.respond_to?(:length) + request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\"" + request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" if io.respond_to?(:to_str) + request.headers['link'] = "; rel=\"type\"" + io = Faraday::UploadIO.new(io, content_type, original_filename) + request.body = io + end + end + + # Returns a new version identifier to mint. Defaults to version1, but will + # increment to version2 etc if one found. Only for Fedora 4. def latest_version(identifier) + # Only version 4 needs a version ID, 5/6 both mint using timestamps. return :not_applicable if fedora_version != 4 version_list = version_list(identifier) return "version1" if version_list.blank? @@ -128,15 +146,17 @@ def perform_find(id:, version_id: nil) # @param identifier [String] Fedora URI to mint a version for. # @return [Valkyrie::ID] version_id of the minted version. - # Versions are created AFTER content is uploaded. + # Versions are created AFTER content is uploaded, except for Fedora 6 which + # auto versions. def mint_version(identifier, version_name = "version1") response = connection.http.post do |request| request.url "#{identifier}/fcr:versions" request.headers['Slug'] = version_name if fedora_version == 4 end - # If there's a deletion marker, don't return anything. + # If there's a deletion marker, don't return anything. (Fedora 4) return nil if response.status == 410 - # This is awful, but versioning is locked to per-second increments. + # This is awful, but versioning is locked to per-second increments, + # returns a 409 in Fedora 5 if there's a conflict. if response.status == 409 sleep(0.5) return mint_version(identifier, version_name) @@ -145,12 +165,6 @@ def mint_version(identifier, version_name = "version1") valkyrie_identifier(uri: response.headers["location"].gsub("/fcr:metadata", "")) end - # Delete the file in Fedora associated with the given identifier. - # @param id [Valkyrie::ID] - def delete(id:) - connection.http.delete(fedora_identifier(id: id)) - end - class IOProxy # @param response [Ldp::Resource::BinarySource] attr_reader :size From 2445134786d16b71b7c1d1cdf432ab3222a0918e Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 12:48:20 -0700 Subject: [PATCH 32/33] Remove TODO --- spec/valkyrie/storage/disk_spec.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spec/valkyrie/storage/disk_spec.rb b/spec/valkyrie/storage/disk_spec.rb index dbafe52d4..545bb3c5b 100644 --- a/spec/valkyrie/storage/disk_spec.rb +++ b/spec/valkyrie/storage/disk_spec.rb @@ -17,8 +17,4 @@ expect(storage_adapter.handles?(id: "disk://#{ROOT_PATH.join('tmp', 'wrong')}")).to eq false end end - - # TODO: Add a toggle to enable versioning. - # Think more about what happens if someone turns versioning on for an existing - # repository. end From 3d8bed79aed9c8a0795f9c25e320847f35c02e71 Mon Sep 17 00:00:00 2001 From: Trey Pendragon Date: Fri, 15 Sep 2023 13:56:23 -0700 Subject: [PATCH 33/33] Remove purge_versions --- lib/valkyrie/storage/versioned_disk.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb index 0baaf06be..e7751d254 100644 --- a/lib/valkyrie/storage/versioned_disk.rb +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -80,7 +80,7 @@ def find_by(id:) # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] - def delete(id:, purge_versions: false) + def delete(id:) id = version_id(id).resolve_current if id.current? id.version_files.each do |version_id|