Skip to content

Commit

Permalink
Merge pull request #932 from samvera/storage_versioning
Browse files Browse the repository at this point in the history
StorageAdapter Versioning Implementation
  • Loading branch information
tpendragon authored Sep 15, 2023
2 parents 7acd4d5 + 3d8bed7 commit a97b5d7
Show file tree
Hide file tree
Showing 14 changed files with 578 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
environment:
CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC"
JAVA_OPTIONS: "-Djetty.http.port=8998 -Dfcrepo.dynamic.jms.port=61618 -Dfcrepo.dynamic.stomp.port=61614"
- image: fcrepo/fcrepo:6.0.0
- image: fcrepo/fcrepo:6.4.0
environment:
CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true"
JAVA_OPTS: "-Djetty.http.port=8978 -Dfcrepo.dynamic.jms.port=61619 -Dfcrepo.dynamic.stomp.port=61615 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true"
Expand Down
15 changes: 11 additions & 4 deletions .lando.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,34 @@ services:
- fedora4:/data
ports:
- 8988:8080
portforward: true
environment:
CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC"
portforward: 8988
valkyrie_fedora_5:
type: compose
app_mount: false
volumes:
fedora5:
services:
image: samvera/fcrepo4:5.1.0
command: /fedora-entrypoint.sh
image: fcrepo/fcrepo:5.1.1-multiplatform
command:
- "catalina.sh"
- "run"
volumes:
- fedora5:/data
ports:
- 8998:8080
environment:
CATALINA_OPTS: "-Djava.awt.headless=true -Dfile.encoding=UTF-8 -server -Xms512m -Xmx1024m -XX:NewSize=256m -XX:MaxNewSize=256m -XX:PermSize=256m -XX:MaxPermSize=256m -XX:+DisableExplicitGC -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true"
JAVA_OPTS: "-Dfcrepo.dynamic.jms.port=61620 -Dfcrepo.dynamic.stomp.port=61617 -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true"
portforward: true
valkyrie_fedora_6:
type: compose
app_mount: false
volumes:
fedora6:
services:
image: fcrepo/fcrepo:6.0.0
image: fcrepo/fcrepo:6.4.0
command:
- "catalina.sh"
- "run"
Expand Down
1 change: 1 addition & 0 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Metrics/ClassLength:
- 'lib/valkyrie/persistence/fedora/persister.rb'
- 'lib/valkyrie/persistence/fedora/query_service.rb'
- 'lib/valkyrie/persistence/postgres/query_service.rb'
- 'lib/valkyrie/storage/fedora.rb'

Metrics/MethodLength:
Exclude:
Expand Down
67 changes: 66 additions & 1 deletion lib/valkyrie/specs/shared_specs/storage_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class Valkyrie::Specs::CustomResource < Valkyrie::Resource
it { is_expected.to respond_to(:find_by).with_keywords(:id) }
it { is_expected.to respond_to(:delete).with_keywords(:id) }
it { is_expected.to respond_to(:upload).with_keywords(:file, :resource, :original_filename) }
it { is_expected.to respond_to(:supports?) }

it "returns false for non-existing features" do
expect(storage_adapter.supports?(:bad_feature_not_real_dont_implement)).to eq false
end

it "can upload a file which is just an IO" do
io_file = Tempfile.new('temp_io')
Expand Down Expand Up @@ -50,7 +55,7 @@ def open_files
end

it "can upload, validate, re-fetch, and delete a file" do
resource = Valkyrie::Specs::CustomResource.new(id: "test")
resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}")
sha1 = Digest::SHA1.file(file).to_s
size = file.size
expect(uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true)).to be_kind_of Valkyrie::StorageAdapter::File
Expand All @@ -77,4 +82,64 @@ def open_files
expect { storage_adapter.find_by(id: uploaded_file.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound
expect { storage_adapter.find_by(id: Valkyrie::ID.new("noexist")) }.to raise_error Valkyrie::StorageAdapter::FileNotFound
end

it "can upload and find new versions" do
pending "Versioning not supported" unless storage_adapter.supports?(:versions)
resource = Valkyrie::Specs::CustomResource.new(id: "test#{SecureRandom.uuid}")
uploaded_file = storage_adapter.upload(file: file, original_filename: 'foo.jpg', resource: resource, fake_upload_argument: true)
expect(uploaded_file.version_id).not_to be_blank

f = Tempfile.new
f.puts "Test File"
f.rewind

# upload_version
new_version = storage_adapter.upload_version(id: uploaded_file.id, file: f)
expect(uploaded_file.id).to eq new_version.id
expect(uploaded_file.version_id).not_to eq new_version.version_id

# find_versions
# Two versions of the same file have the same id, but different version_ids,
# use case: I want to store metadata about a file when it's uploaded as a
# version and refer to it consistently.
versions = storage_adapter.find_versions(id: new_version.id)
expect(versions.length).to eq 2
expect(versions.first.id).to eq new_version.id
expect(versions.first.version_id).to eq new_version.version_id

expect(versions.last.id).to eq uploaded_file.id
expect(versions.last.version_id).to eq uploaded_file.version_id

expect(versions.first.size).not_to eq versions.last.size

expect(storage_adapter.find_by(id: uploaded_file.version_id).version_id).to eq uploaded_file.version_id

# Deleting a version should leave the current versions
if storage_adapter.supports?(:version_deletion)
storage_adapter.delete(id: uploaded_file.version_id)
expect(storage_adapter.find_versions(id: uploaded_file.id).length).to eq 1
expect { storage_adapter.find_by(id: uploaded_file.version_id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound
end
current_length = storage_adapter.find_versions(id: new_version.id).length

# Restoring a previous version is just pumping its file into upload_version
newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id)
expect(newest_version.version_id).not_to eq new_version.id
expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id

# I can restore a version twice
newest_version = storage_adapter.upload_version(file: new_version, id: new_version.id)
expect(newest_version.version_id).not_to eq new_version.id
expect(storage_adapter.find_by(id: newest_version.id).version_id).to eq newest_version.version_id
expect(storage_adapter.find_versions(id: newest_version.id).length).to eq current_length + 2

# NOTE: We originally wanted deleting the current record to push it into the
# versions history, but FCRepo 4/5/6 doesn't work that way, so we changed to
# instead make deleting delete everything.
storage_adapter.delete(id: new_version.id)
expect { storage_adapter.find_by(id: new_version.id) }.to raise_error Valkyrie::StorageAdapter::FileNotFound
expect(storage_adapter.find_versions(id: new_version.id).length).to eq 0
ensure
f&.close
end
end
1 change: 1 addition & 0 deletions lib/valkyrie/storage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ module Valkyrie
# @see lib/valkyrie/specs/shared_specs/storage_adapter.rb
module Storage
require 'valkyrie/storage/disk'
require 'valkyrie/storage/versioned_disk'
require 'valkyrie/storage/fedora'
require 'valkyrie/storage/memory'
end
Expand Down
6 changes: 6 additions & 0 deletions lib/valkyrie/storage/disk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def handles?(id:)
id.to_s.start_with?("disk://#{base_path}")
end

# @param feature [Symbol] Feature to test for.
# @return [Boolean] true if the adapter supports the given feature
def supports?(_feature)
false
end

def file_path(id)
id.to_s.gsub(/^disk:\/\//, '')
end
Expand Down
133 changes: 123 additions & 10 deletions lib/valkyrie/storage/fedora.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,21 @@ def handles?(id:)
id.to_s.start_with?(PROTOCOL)
end

# @param feature [Symbol] Feature to test for.
# @return [Boolean] true if the adapter supports the given feature
def supports?(feature)
return true if feature == :versions
# Fedora 6 auto versions and you can't delete versions.
return true if feature == :version_deletion && fedora_version != 6
false
end

# Return the file associated with the given identifier
# @param id [Valkyrie::ID]
# @return [Valkyrie::StorageAdapter::StreamFile]
# @raise Valkyrie::StorageAdapter::FileNotFound if nothing is found
def find_by(id:)
Valkyrie::StorageAdapter::StreamFile.new(id: id, io: response(id: id))
perform_find(id: id)
end

# @param file [IO]
Expand All @@ -37,24 +46,123 @@ def find_by(id:)
def upload(file:, original_filename:, resource:, content_type: "application/octet-stream", # rubocop:disable Metrics/ParameterLists
resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments)
identifier = resource_uri_transformer.call(resource, base_url) + '/original'
upload_file(fedora_uri: identifier, io: file, content_type: content_type, original_filename: original_filename)
# Fedora 6 auto versions, so check to see if there's a version for this
# initial upload. If not, then mint one (fedora 4/5)
version_id = current_version_id(id: valkyrie_identifier(uri: identifier)) || mint_version(identifier, latest_version(identifier))
perform_find(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id)
end

# @param id [Valkyrie::ID] ID of the Valkyrie::StorageAdapter::StreamFile to
# version.
# @param file [IO]
def upload_version(id:, file:)
uri = fedora_identifier(id: id)
# Fedora 6 has auto versioning, so have to sleep if it's too soon after last
# upload.
if fedora_version == 6 && current_version_id(id: id).to_s.split("/").last == Time.current.utc.strftime("%Y%m%d%H%M%S")
sleep(0.5)
return upload_version(id: id, file: file)
end
upload_file(fedora_uri: uri, io: file)
version_id = mint_version(uri, latest_version(uri))
perform_find(id: Valkyrie::ID.new(uri.to_s.sub(/^.+\/\//, PROTOCOL)), version_id: version_id)
end

# @param id [Valkyrie::ID]
# @return [Array<Valkyrie::StorageAdapter::StreamFile>]
def find_versions(id:)
uri = fedora_identifier(id: id)
version_list = version_list(uri)
version_list.map do |version|
id = valkyrie_identifier(uri: version["@id"])
perform_find(id: id, version_id: id)
end
end

# Delete the file in Fedora associated with the given identifier.
# @param id [Valkyrie::ID]
def delete(id:)
connection.http.delete(fedora_identifier(id: id))
end

def version_list(fedora_uri)
version_list = connection.http.get do |request|
request.url "#{fedora_uri}/fcr:versions"
request.headers["Accept"] = "application/ld+json"
end
return [] unless version_list.success?
version_graph = JSON.parse(version_list.body)&.first
if fedora_version == 4
version_graph&.fetch("http://fedora.info/definitions/v4/repository#hasVersion", [])
else
# Fedora 5/6 use Memento.
version_graph&.fetch("http://www.w3.org/ns/ldp#contains", [])&.sort_by { |x| x["@id"] }&.reverse
end
end

def upload_file(fedora_uri:, io:, content_type: "application/octet-stream", original_filename: "default")
sha1 = [5, 6].include?(fedora_version) ? "sha" : "sha1"
connection.http.put do |request|
request.url identifier
request.url fedora_uri
request.headers['Content-Type'] = content_type
request.headers['Content-Length'] = file.length.to_s
request.headers['Content-Length'] = io.length.to_s if io.respond_to?(:length)
request.headers['Content-Disposition'] = "attachment; filename=\"#{original_filename}\""
request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(file)}"
request.headers['digest'] = "#{sha1}=#{Digest::SHA1.file(io)}" if io.respond_to?(:to_str)
request.headers['link'] = "<http://www.w3.org/ns/ldp#NonRDFSource>; rel=\"type\""
io = Faraday::UploadIO.new(file, content_type, original_filename)
io = Faraday::UploadIO.new(io, content_type, original_filename)
request.body = io
end
find_by(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)))
end

# Delete the file in Fedora associated with the given identifier.
# @param id [Valkyrie::ID]
def delete(id:)
connection.http.delete(fedora_identifier(id: id))
# Returns a new version identifier to mint. Defaults to version1, but will
# increment to version2 etc if one found. Only for Fedora 4.
def latest_version(identifier)
# Only version 4 needs a version ID, 5/6 both mint using timestamps.
return :not_applicable if fedora_version != 4
version_list = version_list(identifier)
return "version1" if version_list.blank?
last_version = version_list.first["@id"]
last_version_number = last_version.split("/").last.gsub("version", "").to_i
"version#{last_version_number + 1}"
end

# @param [Valkyrie::ID] id A storage ID that's not a version, to get the
# version ID of.
def current_version_id(id:)
version_list = version_list(fedora_identifier(id: id))
return nil if version_list.blank?
valkyrie_identifier(uri: version_list.first["@id"])
end

def perform_find(id:, version_id: nil)
current_id = Valkyrie::ID.new(id.to_s.split("/fcr:versions").first)
version_id ||= id if id != current_id
# No version got passed and we're asking for a current_id, gotta get the
# version ID
return perform_find(id: current_id, version_id: (current_version_id(id: id) || :empty)) if version_id.nil?
Valkyrie::StorageAdapter::StreamFile.new(id: current_id, io: response(id: id), version_id: version_id)
end

# @param identifier [String] Fedora URI to mint a version for.
# @return [Valkyrie::ID] version_id of the minted version.
# Versions are created AFTER content is uploaded, except for Fedora 6 which
# auto versions.
def mint_version(identifier, version_name = "version1")
response = connection.http.post do |request|
request.url "#{identifier}/fcr:versions"
request.headers['Slug'] = version_name if fedora_version == 4
end
# If there's a deletion marker, don't return anything. (Fedora 4)
return nil if response.status == 410
# This is awful, but versioning is locked to per-second increments,
# returns a 409 in Fedora 5 if there's a conflict.
if response.status == 409
sleep(0.5)
return mint_version(identifier, version_name)
end
raise "Version unable to be created" unless response.status == 201
valkyrie_identifier(uri: response.headers["location"].gsub("/fcr:metadata", ""))
end

class IOProxy
Expand All @@ -81,6 +189,11 @@ def fedora_identifier(id:)
RDF::URI(identifier)
end

def valkyrie_identifier(uri:)
id = uri.to_s.sub("http://", PROTOCOL)
Valkyrie::ID.new(id)
end

private

# @return [IOProxy]
Expand Down
Loading

0 comments on commit a97b5d7

Please sign in to comment.