diff --git a/.circleci/config.yml b/.circleci/config.yml index 73343ddf..15360c06 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,7 +28,7 @@ jobs: environment: CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC" JAVA_OPTIONS: "-Djetty.http.port=8998 -Dfcrepo.dynamic.jms.port=61618 -Dfcrepo.dynamic.stomp.port=61614" - - image: fcrepo/fcrepo:6.0.0 + - image: fcrepo/fcrepo:6.4.0 environment: CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" JAVA_OPTS: "-Djetty.http.port=8978 -Dfcrepo.dynamic.jms.port=61619 -Dfcrepo.dynamic.stomp.port=61615 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" diff --git a/.lando.yml b/.lando.yml index 6aecf12e..0ab25dab 100644 --- a/.lando.yml +++ b/.lando.yml @@ -24,19 +24,26 @@ services: - fedora4:/data ports: - 8988:8080 - portforward: true + environment: + CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC" + portforward: 8988 valkyrie_fedora_5: type: compose app_mount: false volumes: fedora5: services: - image: samvera/fcrepo4:5.1.0 - command: /fedora-entrypoint.sh + image: fcrepo/fcrepo:5.1.1-multiplatform + command: + - "catalina.sh" + - "run" volumes: - fedora5:/data ports: - 8998:8080 + environment: + CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" + JAVA_OPTS: "-Dfcrepo.dynamic.jms.port=61620 -Dfcrepo.dynamic.stomp.port=61617 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true" portforward: true valkyrie_fedora_6: type: compose @@ -44,7 +51,7 @@ services: volumes: fedora6: services: - image: fcrepo/fcrepo:6.0.0 + image: fcrepo/fcrepo:6.4.0 command: - "catalina.sh" - "run" diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 0e52a31c..3e318b85 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -3,6 +3,7 @@ Metrics/ClassLength: - 'lib/valkyrie/persistence/fedora/persister.rb' - 'lib/valkyrie/persistence/fedora/query_service.rb' - 'lib/valkyrie/persistence/postgres/query_service.rb' + - 'lib/valkyrie/storage/fedora.rb' Metrics/MethodLength: Exclude: diff --git a/lib/valkyrie/specs/shared_specs/storage_adapter.rb b/lib/valkyrie/specs/shared_specs/storage_adapter.rb index ab67655e..becdaa35 100644 --- a/lib/valkyrie/specs/shared_specs/storage_adapter.rb +++ b/lib/valkyrie/specs/shared_specs/storage_adapter.rb @@ -16,6 +16,11 @@ class Valkyrie::Specs::CustomResource < Valkyrie::Resource it { is_expected.to respond_to(:find_by).with_keywords(:id) } it { is_expected.to respond_to(:delete).with_keywords(:id) } it { is_expected.to respond_to(:upload).with_keywords(:file, :resource, :original_filename) } + it { is_expected.to respond_to(:supports?) } + + it "returns false for non-existing features" do + expect(storage_adapter.supports?(:bad_feature_not_real_dont_implement)).to eq false + end it "can upload a file which is just an IO" do io_file = Tempfile.new('temp_io') @@ -50,7 +55,7 @@ def open_files end it "can upload, validate, re-fetch, and delete a file" do - resource = Valkyrie::Specs::CustomResource.new(id: "test") + resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}") sha1 = Digest::SHA1.file(file).to_s size = file.size expect(uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true)).to be_kind_of Valkyrie::StorageAdapter::File @@ -77,4 +82,64 @@ def open_files expect { storage_adapter.find_by(id: uploaded_file.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound expect { storage_adapter.find_by(id: Valkyrie::ID.new("noexist")) }.to raise_error Valkyrie::StorageAdapter::FileNotFound end + + it "can upload and find new versions" do + pending "Versioning not supported" unless storage_adapter.supports?(:versions) + resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}") + uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true) + expect(uploaded_file.version_id).not_to be_blank + + f = Tempfile.new + f.puts "Test File" + f.rewind + + # upload_version + new_version = storage_adapter.upload_version(id: uploaded_file.id, file: f) + expect(uploaded_file.id).to eq new_version.id + expect(uploaded_file.version_id).not_to eq new_version.version_id + + # find_versions + # Two versions of the same file have the same id, but different version_ids, + # use case: I want to store metadata about a file when it's uploaded as a + # version and refer to it consistently. + versions = storage_adapter.find_versions(id: new_version.id) + expect(versions.length).to eq 2 + expect(versions.first.id).to eq new_version.id + expect(versions.first.version_id).to eq new_version.version_id + + expect(versions.last.id).to eq uploaded_file.id + expect(versions.last.version_id).to eq uploaded_file.version_id + + expect(versions.first.size).not_to eq versions.last.size + + expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id + + # Deleting a version should leave the current versions + if storage_adapter.supports?(:version_deletion) + storage_adapter.delete(id: uploaded_file.version_id) + expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1 + expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + end + current_length = storage_adapter.find_versions(id: new_version.id).length + + # Restoring a previous version is just pumping its file into upload_version + newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) + expect(newest_version.version_id).not_to eq new_version.id + expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id + + # I can restore a version twice + newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id) + expect(newest_version.version_id).not_to eq new_version.id + expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id + expect(storage_adapter.find_versions(id: newest_version.id).length).to eq current_length + 2 + + # NOTE: We originally wanted deleting the current record to push it into the + # versions history, but FCRepo 4/5/6 doesn't work that way, so we changed to + # instead make deleting delete everything. + storage_adapter.delete(id: new_version.id) + expect { storage_adapter.find_by(id: new_version.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound + expect(storage_adapter.find_versions(id: new_version.id).length).to eq 0 + ensure + f&.close + end end diff --git a/lib/valkyrie/storage.rb b/lib/valkyrie/storage.rb index 289e92a7..c5b009be 100644 --- a/lib/valkyrie/storage.rb +++ b/lib/valkyrie/storage.rb @@ -31,6 +31,7 @@ module Valkyrie # @see lib/valkyrie/specs/shared_specs/storage_adapter.rb module Storage require 'valkyrie/storage/disk' + require 'valkyrie/storage/versioned_disk' require 'valkyrie/storage/fedora' require 'valkyrie/storage/memory' end diff --git a/lib/valkyrie/storage/disk.rb b/lib/valkyrie/storage/disk.rb index 87975ea7..92922ed0 100644 --- a/lib/valkyrie/storage/disk.rb +++ b/lib/valkyrie/storage/disk.rb @@ -27,6 +27,12 @@ def handles?(id:) id.to_s.start_with?("disk://#{base_path}") end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(_feature) + false + end + def file_path(id) id.to_s.gsub(/^disk:\/\//, '') end diff --git a/lib/valkyrie/storage/fedora.rb b/lib/valkyrie/storage/fedora.rb index 6957942c..2c371611 100644 --- a/lib/valkyrie/storage/fedora.rb +++ b/lib/valkyrie/storage/fedora.rb @@ -19,12 +19,21 @@ def handles?(id:) id.to_s.start_with?(PROTOCOL) end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(feature) + return true if feature == :versions + # Fedora 6 auto versions and you can't delete versions. + return true if feature == :version_deletion && fedora_version != 6 + false + end + # Return the file associated with the given identifier # @param id [Valkyrie::ID] # @return [Valkyrie::StorageAdapter::StreamFile] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - Valkyrie::StorageAdapter::StreamFile.new(id: id, io: response(id: id)) + perform_find(id: id) end # @param file [IO] @@ -37,24 +46,123 @@ def find_by(id:) def upload(file:, original_filename:, resource:, content_type: "application/octet-stream", # rubocop:disable Metrics/ParameterLists resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments) identifier = resource_uri_transformer.call(resource, base_url) + '/original' + upload_file(fedora_uri: identifier, io: file, content_type: content_type, original_filename: original_filename) + # Fedora 6 auto versions, so check to see if there's a version for this + # initial upload. If not, then mint one (fedora 4/5) + version_id = current_version_id(id: valkyrie_identifier(uri: identifier)) || mint_version(identifier, latest_version(identifier)) + perform_find(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) + end + + # @param id [Valkyrie::ID] ID of the Valkyrie::StorageAdapter::StreamFile to + # version. + # @param file [IO] + def upload_version(id:, file:) + uri = fedora_identifier(id: id) + # Fedora 6 has auto versioning, so have to sleep if it's too soon after last + # upload. + if fedora_version == 6 && current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S") + sleep(0.5) + return upload_version(id: id, file: file) + end + upload_file(fedora_uri: uri, io: file) + version_id = mint_version(uri, latest_version(uri)) + perform_find(id: Valkyrie::ID.new(uri.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id) + end + + # @param id [Valkyrie::ID] + # @return [Array] + def find_versions(id:) + uri = fedora_identifier(id: id) + version_list = version_list(uri) + version_list.map do |version| + id = valkyrie_identifier(uri: version["@id"]) + perform_find(id: id, version_id: id) + end + end + + # Delete the file in Fedora associated with the given identifier. + # @param id [Valkyrie::ID] + def delete(id:) + connection.http.delete(fedora_identifier(id: id)) + end + + def version_list(fedora_uri) + version_list = connection.http.get do |request| + request.url "#{fedora_uri}/fcr:versions" + request.headers["Accept"] = "application/ld+json" + end + return [] unless version_list.success? + version_graph = JSON.parse(version_list.body)&.first + if fedora_version == 4 + version_graph&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", []) + else + # Fedora 5/6 use Memento. + version_graph&.fetch("http://www.w3.org/ns/ldp#contains", [])&.sort_by { |x| x["@id"] }&.reverse + end + end + + def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", original_filename: "default") sha1 = [5, 6].include?(fedora_version) ? "sha" : "sha1" connection.http.put do |request| - request.url identifier + request.url fedora_uri request.headers['Content-Type'] = content_type - request.headers['Content-Length'] = file.length.to_s + request.headers['Content-Length'] = io.length.to_s if io.respond_to?(:length) request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\"" - request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(file)}" + request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" if io.respond_to?(:to_str) request.headers['link'] = "; rel=\"type\"" - io = Faraday::UploadIO.new(file, content_type, original_filename) + io = Faraday::UploadIO.new(io, content_type, original_filename) request.body = io end - find_by(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL))) end - # Delete the file in Fedora associated with the given identifier. - # @param id [Valkyrie::ID] - def delete(id:) - connection.http.delete(fedora_identifier(id: id)) + # Returns a new version identifier to mint. Defaults to version1, but will + # increment to version2 etc if one found. Only for Fedora 4. + def latest_version(identifier) + # Only version 4 needs a version ID, 5/6 both mint using timestamps. + return :not_applicable if fedora_version != 4 + version_list = version_list(identifier) + return "version1" if version_list.blank? + last_version = version_list.first["@id"] + last_version_number = last_version.split("/").last.gsub("version", "").to_i + "version#{last_version_number + 1}" + end + + # @param [Valkyrie::ID] id A storage ID that's not a version, to get the + # version ID of. + def current_version_id(id:) + version_list = version_list(fedora_identifier(id: id)) + return nil if version_list.blank? + valkyrie_identifier(uri: version_list.first["@id"]) + end + + def perform_find(id:, version_id: nil) + current_id = Valkyrie::ID.new(id.to_s.split("/fcr:versions").first) + version_id ||= id if id != current_id + # No version got passed and we're asking for a current_id, gotta get the + # version ID + return perform_find(id: current_id, version_id: (current_version_id(id: id) || :empty)) if version_id.nil? + Valkyrie::StorageAdapter::StreamFile.new(id: current_id, io: response(id: id), version_id: version_id) + end + + # @param identifier [String] Fedora URI to mint a version for. + # @return [Valkyrie::ID] version_id of the minted version. + # Versions are created AFTER content is uploaded, except for Fedora 6 which + # auto versions. + def mint_version(identifier, version_name = "version1") + response = connection.http.post do |request| + request.url "#{identifier}/fcr:versions" + request.headers['Slug'] = version_name if fedora_version == 4 + end + # If there's a deletion marker, don't return anything. (Fedora 4) + return nil if response.status == 410 + # This is awful, but versioning is locked to per-second increments, + # returns a 409 in Fedora 5 if there's a conflict. + if response.status == 409 + sleep(0.5) + return mint_version(identifier, version_name) + end + raise "Version unable to be created" unless response.status == 201 + valkyrie_identifier(uri: response.headers["location"].gsub("/fcr:metadata", "")) end class IOProxy @@ -81,6 +189,11 @@ def fedora_identifier(id:) RDF::URI(identifier) end + def valkyrie_identifier(uri:) + id = uri.to_s.sub("http://", PROTOCOL) + Valkyrie::ID.new(id) + end + private # @return [IOProxy] diff --git a/lib/valkyrie/storage/memory.rb b/lib/valkyrie/storage/memory.rb index 6f102b16..91411911 100644 --- a/lib/valkyrie/storage/memory.rb +++ b/lib/valkyrie/storage/memory.rb @@ -17,7 +17,32 @@ def initialize # @return [Valkyrie::StorageAdapter::StreamFile] def upload(file:, original_filename:, resource: nil, **_extra_arguments) identifier = Valkyrie::ID.new("memory://#{resource.id}") - cache[identifier] = Valkyrie::StorageAdapter::StreamFile.new(id: identifier, io: file) + version_id = Valkyrie::ID.new("#{identifier}##{SecureRandom.uuid}") + cache[identifier] ||= {} + cache[identifier][:current] = Valkyrie::StorageAdapter::StreamFile.new(id: identifier, io: file, version_id: version_id) + end + + # @param file [IO] + # @param original_filename [String] + # @param previous_version_id [Valkyrie::ID] + # @param _extra_arguments [Hash] additional arguments which may be passed to + # other adapters. + # @return [Valkyrie::StorageAdapter::StreamFile] + def upload_version(id:, file:) + # Get previous file and add a UUID to the end of it. + new_file = Valkyrie::StorageAdapter::StreamFile.new(id: id, io: file, version_id: Valkyrie::ID.new("#{id}##{SecureRandom.uuid}")) + current_file = cache[id][:current] + cache[id][:current] = new_file + cache[id][:versions] ||= [] + cache[id][:versions].prepend(current_file) if current_file + new_file + end + + # @param id [Valkyrie::ID] + # @return [Array] + def find_versions(id:) + return [] if cache[id].nil? + [cache[id][:current] || nil].compact + cache[id].fetch(:versions, []) end # Return the file associated with the given identifier @@ -25,8 +50,18 @@ def upload(file:, original_filename:, resource: nil, **_extra_arguments) # @return [Valkyrie::StorageAdapter::StreamFile] # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found def find_by(id:) - raise Valkyrie::StorageAdapter::FileNotFound unless cache[id] - cache[id] + no_version_id, _version = id_and_version(id) + raise Valkyrie::StorageAdapter::FileNotFound unless cache[no_version_id] + version = + if id == no_version_id + cache[id][:current] + else + find_versions(id: no_version_id).find do |file| + file.version_id == id + end + end + raise Valkyrie::StorageAdapter::FileNotFound unless version + version end # @param id [Valkyrie::ID] @@ -35,10 +70,35 @@ def handles?(id:) id.to_s.start_with?("memory://") end + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(feature) + case feature + when :versions + true + when :version_deletion + true + else + false + end + end + + def id_and_version(id) + id, version = id.to_s.split("#") + [Valkyrie::ID.new(id), version] + end + # Delete the file on disk associated with the given identifier. # @param id [Valkyrie::ID] def delete(id:) - cache.delete(id) + base_id, version = id_and_version(id) + if version && cache[base_id][:current]&.version_id != id + cache[base_id][:versions].reject! do |file| + file.version_id == id + end + else + cache.delete(base_id) + end nil end end diff --git a/lib/valkyrie/storage/versioned_disk.rb b/lib/valkyrie/storage/versioned_disk.rb new file mode 100644 index 00000000..e7751d25 --- /dev/null +++ b/lib/valkyrie/storage/versioned_disk.rb @@ -0,0 +1,182 @@ +# frozen_string_literal: true +module Valkyrie::Storage + # The VersionedDisk adapter implements versioned storage on disk by storing + # the timestamp of the file's creation as part of the file name + # (v-timestamp-filename.jpg). If the + # current file is deleted it creates a DeletionMarker, which is an empty file + # with "deletionmarker" in the name of the file. + class VersionedDisk + attr_reader :base_path, :path_generator, :file_mover + def initialize(base_path:, path_generator: ::Valkyrie::Storage::Disk::BucketedStorage, file_mover: FileUtils.method(:cp)) + @base_path = Pathname.new(base_path.to_s) + @path_generator = path_generator.new(base_path: base_path) + @file_mover = file_mover + end + + # @param file [IO] + # @param original_filename [String] + # @param resource [Valkyrie::Resource] + # @param _extra_arguments [Hash] additional arguments which may be passed to other adapters + # @return [Valkyrie::StorageAdapter::File] + def upload(file:, original_filename:, resource: nil, paused: false, **extra_arguments) + version_timestamp = current_timestamp + new_path = path_generator.generate(resource: resource, file: file, original_filename: "v-#{version_timestamp}-#{original_filename}") + # If we've gone faster than milliseconds here, pause a millisecond and + # re-call. Probably only an issue for test suites. + return sleep(0.001) && upload(file: file, original_filename: original_filename, resource: resource, paused: true, **extra_arguments) if !paused && File.exist?(new_path) + FileUtils.mkdir_p(new_path.parent) + file_mover.call(file.try(:path) || file.try(:disk_path), new_path) + find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) + end + + def current_timestamp + Time.now.strftime("%s%L") + end + + # @param id [Valkyrie::ID] ID of the Valkyrie::StorageAdapter::File to + # version. + # @param file [IO] + # @param paused [Boolean] set to true when upload_version had to pause for a + # millisecond to get a later timestamp. Internal only - do not set. + def upload_version(id:, file:, paused: false) + version_timestamp = current_timestamp + # Get the existing version_id so we can calculate the next path from it. + current_version_id = version_id(id) + current_version_id = current_version_id.version_files[1] if current_version_id.deletion_marker? + existing_path = current_version_id.file_path + new_path = Pathname.new(existing_path.gsub(current_version_id.version, version_timestamp.to_s)) + # If we've gone faster than milliseconds here, pause a millisecond and + # re-call. + return sleep(0.001) && upload_version(id: id, file: file, paused: true) if !paused && File.exist?(new_path) + FileUtils.mkdir_p(new_path.parent) + file_mover.call(file.try(:path) || file.try(:disk_path), new_path) + find_by(id: Valkyrie::ID.new("versiondisk://#{new_path}")) + end + + # @param id [Valkyrie::ID] + # @return [Boolean] true if this adapter can handle this type of identifer + def handles?(id:) + id.to_s.start_with?("versiondisk://#{base_path}") + end + + # @param feature [Symbol] Feature to test for. + # @return [Boolean] true if the adapter supports the given feature + def supports?(feature) + return true if feature == :versions || feature == :version_deletion + false + end + + # Return the file associated with the given identifier + # @param id [Valkyrie::ID] + # @return [Valkyrie::StorageAdapter::File] + # @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found + def find_by(id:) + version_id = version_id(id) + raise Valkyrie::StorageAdapter::FileNotFound if version_id.nil? || version_id&.deletion_marker? + Valkyrie::StorageAdapter::File.new(id: version_id.current_reference_id.id, io: ::Valkyrie::Storage::Disk::LazyFile.open(version_id.file_path, 'rb'), version_id: version_id.id) + rescue Errno::ENOENT + raise Valkyrie::StorageAdapter::FileNotFound + end + + # Delete the file on disk associated with the given identifier. + # @param id [Valkyrie::ID] + def delete(id:) + id = version_id(id).resolve_current + if id.current? + id.version_files.each do |version_id| + FileUtils.rm_rf(version_id.file_path) + end + elsif File.exist?(id.file_path) + FileUtils.rm_rf(id.file_path) + end + end + + # @param id [Valkyrie::ID] + # @return [Array] + def find_versions(id:) + version_files(id: id).select { |x| !x.to_s.include?("deletionmarker") }.map do |file| + find_by(id: Valkyrie::ID.new("versiondisk://#{file}")) + end + end + + def version_files(id:) + root = Pathname.new(file_path(id)) + id = VersionId.new(id) + root.parent.children.select { |file| file.basename.to_s.end_with?(id.filename) }.sort.reverse + end + + def file_path(version_id) + version_id.to_s.gsub(/^versiondisk:\/\//, '') + end + + # @return VersionId A VersionId value that's resolved a current reference, + # so we can access the `version_id` and current reference. + def version_id(id) + id = VersionId.new(id) + return id unless id.versioned? + id.resolve_current + end + + # A small value class that holds a version id and methods for knowing things about it. + # Examples of version ids in this adapter: + # * "versiondisk://te/st/test/v-current-filename.jpg" (never actually saved this way on disk, just used as a reference) + # * "versiondisk://te/st/test/v-1694195675462560794-filename.jpg" (this timestamped form would be saved on disk) + # * "versiondisk://te/st/test/v-1694195675462560794-deletionmarker-filename.jpg" (this file is saved on disk but empty) + class VersionId + attr_reader :id + def initialize(id) + @id = id + end + + def current_reference_id + self.class.new(Valkyrie::ID.new(string_id.gsub(version, "current"))) + end + + # @return [VersionID] the version_id for the current file + def resolve_current + return self unless reference? + version_files.first + end + + def file_path + @file_path ||= string_id.gsub(/^versiondisk:\/\//, '') + end + + def version_files + root = Pathname.new(file_path) + root.parent.children.select { |file| file.basename.to_s.end_with?(filename) }.sort.reverse.map do |file| + VersionId.new(Valkyrie::ID.new("versiondisk://#{file}")) + end + end + + def deletion_marker? + string_id.include?("deletionmarker") + end + + def current? + version_files.first.id == id + end + + # @return [Boolean] Whether this id is referential (e.g. "current") or absolute (e.g. a timestamp) + def reference? + version == "current" + end + + def versioned? + string_id.include?("v-") + end + + def version + string_id.split("v-").last.split("-", 2).first + end + + def filename + string_id.split("v-").last.split("-", 2).last.gsub("deletionmarker-", "") + end + + def string_id + id.to_s + end + end + end +end diff --git a/lib/valkyrie/storage_adapter.rb b/lib/valkyrie/storage_adapter.rb index 8eed573a..1ed05bbd 100644 --- a/lib/valkyrie/storage_adapter.rb +++ b/lib/valkyrie/storage_adapter.rb @@ -67,6 +67,7 @@ def adapter_for(id:) class File < Dry::Struct attribute :id, Valkyrie::Types::Any attribute :io, Valkyrie::Types::Any + attribute :version_id, Valkyrie::Types::Any.optional.default(nil) delegate :size, :read, :rewind, :close, to: :io def stream io diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index aa1f1369..82c26c5e 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -12,6 +12,7 @@ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__) require "valkyrie" require 'pry' +require 'pry-byebug' require 'action_dispatch' require 'webmock/rspec' require 'timecop' diff --git a/spec/support/fedora_helper.rb b/spec/support/fedora_helper.rb index 11c7a4f2..5cd042ac 100644 --- a/spec/support/fedora_helper.rb +++ b/spec/support/fedora_helper.rb @@ -9,7 +9,7 @@ def fedora_adapter_config(base_path:, schema: nil, fedora_version: 4) elsif fedora_version == 6 port = ENV["FEDORA_6_PORT"] || 8978 end - connection_url = fedora_version == 6 ? "/fcrepo/rest" : "/rest" + connection_url = fedora_version == 6 || (fedora_version == 5 && !ENV["CI"]) ? "/fcrepo/rest" : "/rest" opts = { base_path: base_path, connection: ::Ldp::Client.new(faraday_client("http://#{fedora_auth}localhost:#{port}#{connection_url}")), diff --git a/spec/valkyrie/storage/fedora_spec.rb b/spec/valkyrie/storage/fedora_spec.rb index c3433b1a..557e082e 100644 --- a/spec/valkyrie/storage/fedora_spec.rb +++ b/spec/valkyrie/storage/fedora_spec.rb @@ -94,7 +94,7 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = 'fedora://localhost:8998/rest/test/AN1D4UHA/original' + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN1D4UHA/original" expect(uploaded_file.id.to_s).to eq expected_uri end end @@ -103,7 +103,7 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: '/', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = RDF::URI.new('fedora://localhost:8998/rest/AN1D4UHA/original') + expected_uri = RDF::URI.new("fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/AN1D4UHA/original") expect(uploaded_file.id.to_s).to eq expected_uri end end @@ -131,7 +131,101 @@ class Valkyrie::Specs::FedoraCustomResource < Valkyrie::Resource let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 5)) } it 'produces a valid URI' do - expected_uri = 'fedora://localhost:8998/rest/test/AN/1D/4U/HA/AN1D4UHA/original' + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN/1D/4U/HA/AN1D4UHA/original" + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + end + end + + context "fedora 6" do + before(:all) do + # Start from a clean fedora + wipe_fedora!(base_path: "test", fedora_version: 6) + end + + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + + it_behaves_like "a Valkyrie::StorageAdapter" + + context "when uploading with a content_type" do + it "passes that on" do + io_file = file.tempfile + + resource = Valkyrie::Specs::FedoraCustomResource.new(id: SecureRandom.uuid) + + expect(uploaded_file = storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff" + )).to be_kind_of Valkyrie::StorageAdapter::File + + uri = storage_adapter.fedora_identifier(id: uploaded_file.id) + response = storage_adapter.connection.http.head(uri.to_s) + + expect(response.headers["content-type"]).to eq "image/tiff" + end + end + + context 'testing resource uri transformer' do + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + let(:io_file) { file.tempfile } + let(:resource) { Valkyrie::Specs::FedoraCustomResource.new(id: 'AN1D4UHA') } + let(:uploaded_file) do + storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff" + ) + end + context 'when using default transformer' do + context 'and basepath is passed in' do + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN1D4UHA/original" + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + + context "when basepath uses default (e.g. '/')" do + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: '/', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = RDF::URI.new("fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/AN1D4UHA/original") + expect(uploaded_file.id.to_s).to eq expected_uri + end + end + end + + context 'when transformer is passed in' do + let(:uploaded_file) do + storage_adapter.upload( + file: io_file, + original_filename: 'foo.jpg', + resource: resource, + fake_upload_argument: true, + content_type: "image/tiff", + resource_uri_transformer: uri_transformer + ) + end + let(:uri_transformer) do + lambda do |resource, base_url| + id = CGI.escape(resource.id.to_s) + head = id.split('/').first + head.gsub!(/#.*/, '') + RDF::URI.new(base_url + (head.scan(/..?/).first(4) + [id]).join('/')) + end + end + let(:storage_adapter) { described_class.new(**fedora_adapter_config(base_path: 'test', fedora_version: 6)) } + + it 'produces a valid URI' do + expected_uri = "fedora://#{storage_adapter.connection.http.url_prefix.to_s.gsub('http://', '')}/test/AN/1D/4U/HA/AN1D4UHA/original" expect(uploaded_file.id.to_s).to eq expected_uri end end diff --git a/spec/valkyrie/storage/versioned_disk_spec.rb b/spec/valkyrie/storage/versioned_disk_spec.rb new file mode 100644 index 00000000..f1c8b323 --- /dev/null +++ b/spec/valkyrie/storage/versioned_disk_spec.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true +require 'spec_helper' +require 'valkyrie/specs/shared_specs' +include ActionDispatch::TestProcess + +RSpec.describe Valkyrie::Storage::VersionedDisk do + it_behaves_like "a Valkyrie::StorageAdapter" + let(:storage_adapter) { described_class.new(base_path: ROOT_PATH.join("tmp", "files_test")) } + let(:file) { fixture_file_upload('files/example.tif', 'image/tiff') } + before do + FileUtils.rm_rf(ROOT_PATH.join("tmp", "files_test")) + end + + describe ".handles?" do + it "matches on base_path" do + expect(storage_adapter.handles?(id: "versiondisk://#{ROOT_PATH.join('tmp', 'files_test')}")).to eq true + end + + it "does not match when base_path differs" do + expect(storage_adapter.handles?(id: "versiondisk://#{ROOT_PATH.join('tmp', 'wrong')}")).to eq false + end + end +end