From 83b73e780608a783b8023ece0de08e91f7dc3b9c Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 18:51:10 +0100 Subject: [PATCH 01/10] Use standardrb code formatting --- .github/workflows/ci.yml | 2 +- Gemfile | 2 +- Rakefile | 12 ++-- lib/zipline.rb | 10 +-- lib/zipline/zip_handler.rb | 14 ++-- spec/lib/zipline/zip_handler_spec.rb | 96 +++++++++++++++------------- spec/lib/zipline/zipline_spec.rb | 28 ++++---- spec/spec_helper.rb | 28 ++++---- zipline.gemspec | 45 +++++++------ 9 files changed, 125 insertions(+), 112 deletions(-) mode change 100644 => 100755 Rakefile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fdca5a1..e7962fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,5 +22,5 @@ jobs: with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true # 'bundle install' and cache - - name: Run tests + - name: Run tests and standard run: bundle exec rake diff --git a/Gemfile b/Gemfile index 5afb5c1..9e5e083 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,4 @@ -source 'https://rubygems.org' +source "https://rubygems.org" # Specify your gem's dependencies in zipline.gemspec gemspec diff --git a/Rakefile b/Rakefile old mode 100644 new mode 100755 index b2df96f..54f71f8 --- a/Rakefile +++ b/Rakefile @@ -1,12 +1,8 @@ #!/usr/bin/env rake require "bundler/gem_tasks" +require "standard/rake" +require "rspec/core/rake_task" -begin - require 'rspec/core/rake_task' +RSpec::Core::RakeTask.new(:spec) - RSpec::Core::RakeTask.new(:spec) - - task default: :spec -rescue LoadError - # no rspec available -end +task default: [:spec, :standard] diff --git a/lib/zipline.rb b/lib/zipline.rb index 4e0552f..5dcdb30 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -1,7 +1,7 @@ -require 'content_disposition' -require 'zip_kit' -require 'zipline/version' -require 'zipline/zip_handler' +require "content_disposition" +require "zip_kit" +require "zipline/version" +require "zipline/zip_handler" # class MyController < ApplicationController # include Zipline @@ -17,7 +17,7 @@ def self.included(into_controller) super end - def zipline(files, zipname = 'zipline.zip', **kwargs_for_zip_kit_stream) + def zipline(files, zipname = "zipline.zip", **kwargs_for_zip_kit_stream) zip_kit_stream(filename: zipname, **kwargs_for_zip_kit_stream) do |zip_kit_streamer| handler = Zipline::ZipHandler.new(zip_kit_streamer, logger) files.each do |file, name, options = {}| diff --git a/lib/zipline/zip_handler.rb b/lib/zipline/zip_handler.rb index 8c51357..3bd9096 100644 --- a/lib/zipline/zip_handler.rb +++ b/lib/zipline/zip_handler.rb @@ -14,7 +14,7 @@ def handle_file(file, name, options) # Rack bodies, it can be helpful to print the error to the Rails log at least error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." error_message += " The error occurred when handling file #{name.inspect}" - @logger.error(error_message) if @logger + @logger&.error(error_message) raise end @@ -51,7 +51,7 @@ def normalize(file) elsif is_url?(file) {url: file} else - raise(ArgumentError, 'Bad File/Stream') + raise(ArgumentError, "Bad File/Stream") end end @@ -71,7 +71,7 @@ def write_file(file, name, options) elsif file[:blob] file[:blob].download { |chunk| writer_for_file << chunk } else - raise(ArgumentError, 'Bad File/Stream') + raise(ArgumentError, "Bad File/Stream") end end end @@ -91,8 +91,12 @@ def is_active_storage_one?(file) end def is_url?(url) - url = URI.parse(url) rescue false - url.kind_of?(URI::HTTP) || url.kind_of?(URI::HTTPS) + url = begin + URI.parse(url) + rescue + false + end + url.is_a?(URI::HTTP) || url.is_a?(URI::HTTPS) end end end diff --git a/spec/lib/zipline/zip_handler_spec.rb b/spec/lib/zipline/zip_handler_spec.rb index 7b396db..6ca5277 100644 --- a/spec/lib/zipline/zip_handler_spec.rb +++ b/spec/lib/zipline/zip_handler_spec.rb @@ -1,17 +1,21 @@ -require 'spec_helper' -require 'tempfile' +require "spec_helper" +require "tempfile" module ActiveStorage class Attached class One < Attached end end + class Attachment; end + class Blob; end + class Filename def initialize(name) @name = name end + def to_s @name end @@ -20,38 +24,44 @@ def to_s describe Zipline::ZipHandler do before { Fog.mock! } - let(:file_attributes){ { - key: 'fog_file_tests', - body: 'some body', - public: true - }} - let(:directory_attributes){{ - key: 'fog_directory' - }} - let(:storage_attributes){{ - aws_access_key_id: 'fake_access_key_id', - aws_secret_access_key: 'fake_secret_access_key', - provider: 'AWS' - }} - let(:storage){ Fog::Storage.new(storage_attributes)} - let(:directory){ storage.directories.create(directory_attributes) } - let(:file){ directory.files.create(file_attributes) } - - describe '.normalize' do - let(:handler){ Zipline::ZipHandler.new(_streamer = double(), _logger = nil)} + let(:file_attributes) { + { + key: "fog_file_tests", + body: "some body", + public: true + } + } + let(:directory_attributes) { + { + key: "fog_directory" + } + } + let(:storage_attributes) { + { + aws_access_key_id: "fake_access_key_id", + aws_secret_access_key: "fake_secret_access_key", + provider: "AWS" + } + } + let(:storage) { Fog::Storage.new(storage_attributes) } + let(:directory) { storage.directories.create(directory_attributes) } + let(:file) { directory.files.create(file_attributes) } + + describe ".normalize" do + let(:handler) { Zipline::ZipHandler.new(_streamer = double, _logger = nil) } context "CarrierWave" do context "Remote" do - let(:file){ CarrierWave::Storage::Fog::File.new(nil,nil,nil) } + let(:file) { CarrierWave::Storage::Fog::File.new(nil, nil, nil) } it "extracts the url" do - allow(file).to receive(:url).and_return('fakeurl') + allow(file).to receive(:url).and_return("fakeurl") expect(File).not_to receive(:open) - expect(handler.normalize(file)).to eq({url: 'fakeurl'}) + expect(handler.normalize(file)).to eq({url: "fakeurl"}) end end context "Local" do - let(:file){ CarrierWave::SanitizedFile.new(Tempfile.new('t')) } + let(:file) { CarrierWave::SanitizedFile.new(Tempfile.new("t")) } it "creates a File" do - allow(file).to receive(:path).and_return('spec/fakefile.txt') + allow(file).to receive(:path).and_return("spec/fakefile.txt") normalized = handler.normalize(file) expect(normalized.keys).to include(:file) expect(normalized[:file]).to be_a File @@ -61,20 +71,20 @@ def to_s let(:uploader) { Class.new(CarrierWave::Uploader::Base).new } context "Remote" do - let(:file){ CarrierWave::Storage::Fog::File.new(nil,nil,nil) } + let(:file) { CarrierWave::Storage::Fog::File.new(nil, nil, nil) } it "extracts the url" do allow(uploader).to receive(:file).and_return(file) - allow(file).to receive(:url).and_return('fakeurl') + allow(file).to receive(:url).and_return("fakeurl") expect(File).not_to receive(:open) - expect(handler.normalize(uploader)).to eq({url: 'fakeurl'}) + expect(handler.normalize(uploader)).to eq({url: "fakeurl"}) end end context "Local" do - let(:file){ CarrierWave::SanitizedFile.new(Tempfile.new('t')) } + let(:file) { CarrierWave::SanitizedFile.new(Tempfile.new("t")) } it "creates a File" do allow(uploader).to receive(:file).and_return(file) - allow(file).to receive(:path).and_return('spec/fakefile.txt') + allow(file).to receive(:path).and_return("spec/fakefile.txt") normalized = handler.normalize(uploader) expect(normalized.keys).to include(:file) expect(normalized[:file]).to be_a File @@ -84,20 +94,20 @@ def to_s end context "Paperclip" do context "Local" do - let(:file){ Paperclip::Attachment.new(:name, :instance) } + let(:file) { Paperclip::Attachment.new(:name, :instance) } it "creates a File" do - allow(file).to receive(:path).and_return('spec/fakefile.txt') + allow(file).to receive(:path).and_return("spec/fakefile.txt") normalized = handler.normalize(file) expect(normalized.keys).to include(:file) expect(normalized[:file]).to be_a File end end context "Remote" do - let(:file){ Paperclip::Attachment.new(:name, :instance, storage: :s3) } + let(:file) { Paperclip::Attachment.new(:name, :instance, storage: :s3) } it "creates a URL" do - allow(file).to receive(:expiring_url).and_return('fakeurl') + allow(file).to receive(:expiring_url).and_return("fakeurl") expect(File).to_not receive(:open) - expect(handler.normalize(file)).to include(url: 'fakeurl') + expect(handler.normalize(file)).to include(url: "fakeurl") end end end @@ -154,7 +164,7 @@ def create_attachment def create_blob blob = ActiveStorage::Blob.new - allow(blob).to receive(:service_url).and_return('fakeurl') + allow(blob).to receive(:service_url).and_return("fakeurl") filename = create_filename allow(blob).to receive(:filename).and_return(filename) blob @@ -162,26 +172,26 @@ def create_blob def create_filename # Rails wraps Blob#filname in this class since Rails 5.2 - ActiveStorage::Filename.new('test') + ActiveStorage::Filename.new("test") end end context "Fog" do it "extracts url" do - allow(file).to receive(:url).and_return('fakeurl') + allow(file).to receive(:url).and_return("fakeurl") expect(File).not_to receive(:open) - expect(handler.normalize(file)).to eq(url: 'fakeurl') + expect(handler.normalize(file)).to eq(url: "fakeurl") end end context "IOStream" do - let(:file){ StringIO.new('passthrough')} + let(:file) { StringIO.new("passthrough") } it "passes through" do expect(handler.normalize(file)).to eq(file: file) end end context "invalid" do - let(:file){ Thread.new{} } + let(:file) { Thread.new {} } it "raises error" do - expect{handler.normalize(file)}.to raise_error(ArgumentError) + expect { handler.normalize(file) }.to raise_error(ArgumentError) end end end diff --git a/spec/lib/zipline/zipline_spec.rb b/spec/lib/zipline/zipline_spec.rb index db2196e..8b7b4a5 100644 --- a/spec/lib/zipline/zipline_spec.rb +++ b/spec/lib/zipline/zipline_spec.rb @@ -1,5 +1,5 @@ -require 'spec_helper' -require 'action_controller' +require "spec_helper" +require "action_controller" describe Zipline do before do @@ -14,7 +14,7 @@ def download_zip [StringIO.new("File content goes here"), "one.txt"], [StringIO.new("Some other content goes here"), "two.txt"] ] - zipline(files, 'myfiles.zip', auto_rename_duplicate_filenames: false) + zipline(files, "myfiles.zip", auto_rename_duplicate_filenames: false) end class FailingIO < StringIO @@ -28,11 +28,11 @@ def download_zip_with_error_during_streaming [StringIO.new("File content goes here"), "one.txt"], [FailingIO.new("This will fail half-way"), "two.txt"] ] - zipline(files, 'myfiles.zip', auto_rename_duplicate_filenames: false) + zipline(files, "myfiles.zip", auto_rename_duplicate_filenames: false) end end - it 'passes keyword parameters to ZipKit::OutputEnumerator' do + it "passes keyword parameters to ZipKit::OutputEnumerator" do fake_rack_env = { "HTTP_VERSION" => "HTTP/1.0", "REQUEST_METHOD" => "GET", @@ -40,16 +40,20 @@ def download_zip_with_error_during_streaming "PATH_INFO" => "/download", "QUERY_STRING" => "", "SERVER_NAME" => "host.example", - "rack.input" => StringIO.new, + "rack.input" => StringIO.new } expect(ZipKit::OutputEnumerator).to receive(:new).with(auto_rename_duplicate_filenames: false).and_call_original status, headers, body = FakeController.action(:download_zip).call(fake_rack_env) - expect(headers['Content-Disposition']).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip") + expect(status).to eq(200) + expect(headers["Content-Disposition"]).to eq("attachment; filename=\"myfiles.zip\"; filename*=UTF-8''myfiles.zip") + expect { + body.each {} + }.not_to raise_error end - it 'sends the exception raised in the streaming body to the Rails logger' do + it "sends the exception raised in the streaming body to the Rails logger" do fake_rack_env = { "HTTP_VERSION" => "HTTP/1.0", "REQUEST_METHOD" => "GET", @@ -57,17 +61,17 @@ def download_zip_with_error_during_streaming "PATH_INFO" => "/download", "QUERY_STRING" => "", "SERVER_NAME" => "host.example", - "rack.input" => StringIO.new, + "rack.input" => StringIO.new } - fake_logger = double() + fake_logger = double allow(fake_logger).to receive(:warn) expect(fake_logger).to receive(:error).with(a_string_matching(/when serving the ZIP/)) FakeController.logger = fake_logger expect { - status, headers, body = FakeController.action(:download_zip_with_error_during_streaming).call(fake_rack_env) - body.each { } + _status, _headers, body = FakeController.action(:download_zip_with_error_during_streaming).call(fake_rack_env) + body.each {} }.to raise_error(/Something wonky/) end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b106113..5ccca6c 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,22 +1,22 @@ -require 'rspec' -require 'active_support' -require 'active_support/core_ext' -require 'action_dispatch' +require "rspec" +require "active_support" +require "active_support/core_ext" +require "action_dispatch" -require 'zipline' -require 'paperclip' -require 'fog-aws' -require 'carrierwave' +require "zipline" +require "paperclip" +require "fog-aws" +require "carrierwave" -Dir["#{File.expand_path('..', __FILE__)}/support/**/*.rb"].sort.each { |f| require f } +Dir["#{File.expand_path("..", __FILE__)}/support/**/*.rb"].sort.each { |f| require f } CarrierWave.configure do |config| - config.fog_provider = 'fog/aws' + config.fog_provider = "fog/aws" config.fog_credentials = { - provider: 'AWS', - aws_access_key_id: 'dummy', - aws_secret_access_key: 'data', - region: 'us-west-2', + provider: "AWS", + aws_access_key_id: "dummy", + aws_secret_access_key: "data", + region: "us-west-2" } end diff --git a/zipline.gemspec b/zipline.gemspec index 331682c..9cf6cbf 100644 --- a/zipline.gemspec +++ b/zipline.gemspec @@ -1,34 +1,33 @@ -# -*- encoding: utf-8 -*- -require File.expand_path('../lib/zipline/version', __FILE__) +require File.expand_path("../lib/zipline/version", __FILE__) Gem::Specification.new do |gem| - gem.authors = ["Ram Dobson"] - gem.email = ["ram.dobson@solsystemscompany.com"] - gem.description = %q{a module for streaming dynamically generated zip files} - gem.summary = %q{stream zip files from rails} - gem.homepage = "http://github.com/fringd/zipline" + gem.authors = ["Ram Dobson"] + gem.email = ["ram.dobson@solsystemscompany.com"] + gem.description = "a module for streaming dynamically generated zip files" + gem.summary = "stream zip files from rails" + gem.homepage = "http://github.com/fringd/zipline" - gem.files = `git ls-files`.split($\) - %w{.gitignore} - gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } - gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) - gem.name = "zipline" + gem.files = `git ls-files`.split($\) - %w[.gitignore] + gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) } + gem.name = "zipline" gem.require_paths = ["lib"] - gem.version = Zipline::VERSION - gem.licenses = ['MIT'] + gem.version = Zipline::VERSION + gem.licenses = ["MIT"] gem.required_ruby_version = ">= 2.7" - gem.add_dependency 'actionpack', ['>= 6.0', '< 8.0'] - gem.add_dependency 'content_disposition', '~> 1.0' - gem.add_dependency 'zip_kit', ['~> 6', '>= 6.2.0', '< 7'] + gem.add_dependency "actionpack", [">= 6.0", "< 8.0"] + gem.add_dependency "content_disposition", "~> 1.0" + gem.add_dependency "zip_kit", ["~> 6", ">= 6.2.0", "< 7"] - gem.add_development_dependency 'rspec', '~> 3' - gem.add_development_dependency 'fog-aws' - gem.add_development_dependency 'aws-sdk-s3' - gem.add_development_dependency 'carrierwave' - gem.add_development_dependency 'paperclip' - gem.add_development_dependency 'rake' + gem.add_development_dependency "rspec", "~> 3" + gem.add_development_dependency "fog-aws" + gem.add_development_dependency "aws-sdk-s3" + gem.add_development_dependency "carrierwave" + gem.add_development_dependency "paperclip" + gem.add_development_dependency "rake" + gem.add_development_dependency "standard", "1.28.5" # Very specific version of standard for 2.6 with _known_ settings # https://github.com/rspec/rspec-mocks/issues/1457 - gem.add_development_dependency 'rspec-mocks', '~> 3.12' + gem.add_development_dependency "rspec-mocks", "~> 3.12" end From 323c55bcf300b6c22e92f7773dee93104b016fe5 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 20:10:20 +0100 Subject: [PATCH 02/10] Configure Standard --- .standard.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .standard.yml diff --git a/.standard.yml b/.standard.yml new file mode 100644 index 0000000..410cdd7 --- /dev/null +++ b/.standard.yml @@ -0,0 +1,4 @@ +ruby_version: 2.7 +ignore: + - 'spec/**/*': + - Lint/ConstantDefinitionInBlock From 0809caba9585b492e26e703bf3fa4d35e4790032 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 18:53:39 +0100 Subject: [PATCH 03/10] content_disposition gem is no longer needed --- lib/zipline.rb | 2 +- zipline.gemspec | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/zipline.rb b/lib/zipline.rb index 5dcdb30..e101103 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -1,7 +1,7 @@ -require "content_disposition" require "zip_kit" require "zipline/version" require "zipline/zip_handler" +require "zipline/retrievers" # class MyController < ApplicationController # include Zipline diff --git a/zipline.gemspec b/zipline.gemspec index 9cf6cbf..d842afb 100644 --- a/zipline.gemspec +++ b/zipline.gemspec @@ -17,7 +17,6 @@ Gem::Specification.new do |gem| gem.required_ruby_version = ">= 2.7" gem.add_dependency "actionpack", [">= 6.0", "< 8.0"] - gem.add_dependency "content_disposition", "~> 1.0" gem.add_dependency "zip_kit", ["~> 6", ">= 6.2.0", "< 7"] gem.add_development_dependency "rspec", "~> 3" From e97eb5e87e258bfcc1728cb098bea723fbe3433e Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 18:54:12 +0100 Subject: [PATCH 04/10] Use require_relative --- lib/zipline.rb | 7 ++++--- lib/zipline/retrievers.rb | 0 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 lib/zipline/retrievers.rb diff --git a/lib/zipline.rb b/lib/zipline.rb index e101103..4732f1f 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -1,7 +1,8 @@ require "zip_kit" -require "zipline/version" -require "zipline/zip_handler" -require "zipline/retrievers" + +require_relative "zipline/version" +require_relative "zipline/zip_handler" +require_relative "zipline/retrievers" # class MyController < ApplicationController # include Zipline diff --git a/lib/zipline/retrievers.rb b/lib/zipline/retrievers.rb new file mode 100644 index 0000000..e69de29 From 4bf3888a8087f9a44baac1bd8e1c9abe6e36d6a2 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 19:49:01 +0100 Subject: [PATCH 05/10] Relocate specs to reflect lib/ --- spec/{lib => }/zipline/zip_handler_spec.rb | 0 spec/{lib => }/zipline/zipline_spec.rb | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename spec/{lib => }/zipline/zip_handler_spec.rb (100%) rename spec/{lib => }/zipline/zipline_spec.rb (100%) diff --git a/spec/lib/zipline/zip_handler_spec.rb b/spec/zipline/zip_handler_spec.rb similarity index 100% rename from spec/lib/zipline/zip_handler_spec.rb rename to spec/zipline/zip_handler_spec.rb diff --git a/spec/lib/zipline/zipline_spec.rb b/spec/zipline/zipline_spec.rb similarity index 100% rename from spec/lib/zipline/zipline_spec.rb rename to spec/zipline/zipline_spec.rb From 778dfd55840f279c45fe37439d21871df100227e Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 19:49:40 +0100 Subject: [PATCH 06/10] Introduce Retrievers They are fluffy and can fetch stuff to include in ZIPs --- lib/zipline.rb | 8 +-- lib/zipline/retrievers.rb | 126 ++++++++++++++++++++++++++++++++++ lib/zipline/zip_handler.rb | 134 +++++++++++-------------------------- 3 files changed, 168 insertions(+), 100 deletions(-) diff --git a/lib/zipline.rb b/lib/zipline.rb index 4732f1f..3ac2afb 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -1,9 +1,5 @@ require "zip_kit" -require_relative "zipline/version" -require_relative "zipline/zip_handler" -require_relative "zipline/retrievers" - # class MyController < ApplicationController # include Zipline # def index @@ -13,6 +9,10 @@ # end # end module Zipline + require_relative "zipline/version" + require_relative "zipline/zip_handler" + require_relative "zipline/retrievers" + def self.included(into_controller) into_controller.include(ZipKit::RailsStreaming) super diff --git a/lib/zipline/retrievers.rb b/lib/zipline/retrievers.rb index e69de29..d2760f6 100644 --- a/lib/zipline/retrievers.rb +++ b/lib/zipline/retrievers.rb @@ -0,0 +1,126 @@ +class Zipline::IORetriever + def self.build_for(item) + return new(item) if item.respond_to?(:read) && item.respond_to?(:read_nonblock) + end + + def initialize(io) + @io = io + end + + def each_chunk + chunk_size = 1024 + while (bytes = @io.read(chunk_size)) + yield(bytes) + end + end +end + +class Zipline::FileRetriever < Zipline::IORetriever + def self.build_for(item) + return super(item) if item.is_a?(File) + end + + def each_chunk(&blk) + @io.rewind + super(&blk) + ensure + @io.close + end +end + +class Zipline::HTTPRetriever + def self.build_for(url) + return unless item && item.is_a?(String) && item.start_with?("http") + new(item) + end + + def initialize(url) + @uri = URI(url) + end + + def each_chunk(&block) + Net::HTTP.get_response(@uri) do |response| + response.read_body(&block) + end + end + + def may_restart_after?(e) + # Server error, IO error etc + false + end +end + +class Zipline::StringRetriever + def self.build_for(item) + return unless item.is_a?(String) + new(item) + end + + def initialize(string) + @string = string + end + + def each_chunk + chunk_size = 1024 + offset = 0 + loop do + bytes = @string.byteslice(offset, chunk_size) + offset += chunk_size + break if bytes.nil? + yield(bytes) + end + end + + def may_restart_after?(e) + false + end +end + +class Zipline::CarrierwaveRetriever + def self.build_for(item) + if defined?(CarrierWave::Storage::Fog::File) && item.is_a?(CarrierWave::Storage::Fog::File) + return Zipline::HTTPRetriever.new(item.url) + end + end +end + +class Zipline::ActiveStorageRetriever + def self.build_for(item) + return unless defined?(ActiveStorage) + return new(item.blob) if is_active_storage_attachment?(item) || is_active_storage_one?(item) + return new(item) if is_active_storage_blob?(item) + nil + end + + + def self.is_active_storage_attachment?(item) + defined?(ActiveStorage::Attachment) && item.is_a?(ActiveStorage::Attachment) + end + + def self.is_active_storage_one?(item) + defined?(ActiveStorage::Attached::One) && item.is_a?(ActiveStorage::Attached::One) + end + + def self.is_active_storage_blob?(item) + defined?(ActiveStorage::Blob) && item.is_a?(ActiveStorage::Blob) + end + + def initialize(blob) + @blob = blob + end + + def each_chunk(&block) + @blob.download(&block) + end +end + +class Zipline::PaperclipRetriever + def self.build_for(item) + return unless defined?(Paperclip) && item.is_a?(Paperclip::Attachment) + if item.options[:storage] == :filesystem + Zipline::FileRetriever.build_for(File.open(item.path, "rb")) + else + Zipline::HTTPRetriever.build_for(file.expiring_url) + end + end +end diff --git a/lib/zipline/zip_handler.rb b/lib/zipline/zip_handler.rb index 3bd9096..9b9c819 100644 --- a/lib/zipline/zip_handler.rb +++ b/lib/zipline/zip_handler.rb @@ -1,102 +1,44 @@ -module Zipline - class ZipHandler - # takes an array of pairs [[uploader, filename], ... ] - def initialize(streamer, logger) - @streamer = streamer - @logger = logger - end - - def handle_file(file, name, options) - normalized_file = normalize(file) - write_file(normalized_file, name, options) - rescue => e - # Since most APM packages do not trace errors occurring within streaming - # Rack bodies, it can be helpful to print the error to the Rails log at least - error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." - error_message += " The error occurred when handling file #{name.inspect}" - @logger&.error(error_message) - raise - end - - # This extracts either a url or a local file from the provided file. - # Currently support carrierwave and paperclip local and remote storage. - # returns a hash of the form {url: aUrl} or {file: anIoObject} - def normalize(file) - if defined?(CarrierWave::Uploader::Base) && file.is_a?(CarrierWave::Uploader::Base) - file = file.file - end - - if defined?(Paperclip) && file.is_a?(Paperclip::Attachment) - if file.options[:storage] == :filesystem - {file: File.open(file.path)} - else - {url: file.expiring_url} - end - elsif defined?(CarrierWave::Storage::Fog::File) && file.is_a?(CarrierWave::Storage::Fog::File) - {url: file.url} - elsif defined?(CarrierWave::SanitizedFile) && file.is_a?(CarrierWave::SanitizedFile) - {file: File.open(file.path)} - elsif is_io?(file) - {file: file} - elsif defined?(ActiveStorage::Blob) && file.is_a?(ActiveStorage::Blob) - {blob: file} - elsif is_active_storage_attachment?(file) || is_active_storage_one?(file) - {blob: file.blob} - elsif file.respond_to? :url - {url: file.url} - elsif file.respond_to? :path - {file: File.open(file.path)} - elsif file.respond_to? :file - {file: File.open(file.file)} - elsif is_url?(file) - {url: file} - else - raise(ArgumentError, "Bad File/Stream") - end - end - - def write_file(file, name, options) - @streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file| - if file[:url] - the_remote_uri = URI(file[:url]) +class Zipline::ZipHandler + def initialize(streamer, logger) + @streamer = streamer + @logger = logger + end + + def handle_file(file, name, options) + write_item(file, name, options) + rescue => e + # Since most APM packages do not trace errors occurring within streaming + # Rack bodies, it can be helpful to print the error to the Rails log at least + error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." + error_message += " The error occurred when handling file #{name.inspect}" + @logger&.error(error_message) + raise + end - Net::HTTP.get_response(the_remote_uri) do |response| - response.read_body do |chunk| - writer_for_file << chunk - end - end - elsif file[:file] - IO.copy_stream(file[:file], writer_for_file) - file[:file].close - elsif file[:blob] - file[:blob].download { |chunk| writer_for_file << chunk } - else - raise(ArgumentError, "Bad File/Stream") - end + def write_item(item, name, options) + retriever = pick_retriever_for(item) + @streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file| + retriever.each_chunk do |bytes| + writer_for_file << bytes end end + end - private - - def is_io?(io_ish) - io_ish.respond_to? :read - end - - def is_active_storage_attachment?(file) - defined?(ActiveStorage::Attachment) && file.is_a?(ActiveStorage::Attachment) - end - - def is_active_storage_one?(file) - defined?(ActiveStorage::Attached::One) && file.is_a?(ActiveStorage::Attached::One) - end - - def is_url?(url) - url = begin - URI.parse(url) - rescue - false - end - url.is_a?(URI::HTTP) || url.is_a?(URI::HTTPS) - end + def pick_retriever_for(item) + retriever_classes = [ + Zipline::CarrierwaveRetriever, + Zipline::ActiveStorageRetriever, + Zipline::PaperclipRetriever, + Zipline::FileRetriever, + Zipline::IORetriever, + Zipline::HTTPRetriever, + Zipline::StringRetriever, + ] + retriever_classes.each do |retriever_class| + maybe_retriever = retriever_class.build_for(item) + return maybe_retriever if maybe_retriever + end + + raise "Don't know how to handle a file in the shape of #{file_argument.inspect}" unless retriever end end From ff29c3cf1cff463c98a0102dd3d1b6c2cc183230 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 19:59:35 +0100 Subject: [PATCH 07/10] Clean things up some more --- lib/zipline/retrievers.rb | 47 +++++++++++++++++++------------------- lib/zipline/zip_handler.rb | 6 ++--- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/lib/zipline/retrievers.rb b/lib/zipline/retrievers.rb index d2760f6..3f729ca 100644 --- a/lib/zipline/retrievers.rb +++ b/lib/zipline/retrievers.rb @@ -7,11 +7,8 @@ def initialize(io) @io = io end - def each_chunk - chunk_size = 1024 - while (bytes = @io.read(chunk_size)) - yield(bytes) - end + def download_and_write_into(destination) + IO.copy_stream(@io, destination) end end @@ -20,34 +17,35 @@ def self.build_for(item) return super(item) if item.is_a?(File) end - def each_chunk(&blk) + def download_and_write_into(destination) @io.rewind - super(&blk) + super(destination) ensure @io.close end end class Zipline::HTTPRetriever - def self.build_for(url) - return unless item && item.is_a?(String) && item.start_with?("http") - new(item) + def self.build_for(url_or_uri) + uri = begin + URI.parse(url_or_uri) + rescue + return + end + return new(uri) if uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) end - def initialize(url) - @uri = URI(url) + def initialize(uri) + @uri = uri end - def each_chunk(&block) + def download_and_write_into(destination) Net::HTTP.get_response(@uri) do |response| - response.read_body(&block) + response.read_body do |chunk| + destination.write(destination) + end end end - - def may_restart_after?(e) - # Server error, IO error etc - false - end end class Zipline::StringRetriever @@ -60,14 +58,14 @@ def initialize(string) @string = string end - def each_chunk + def download_and_write_into(destination) chunk_size = 1024 offset = 0 loop do bytes = @string.byteslice(offset, chunk_size) offset += chunk_size + destination.write(bytes) break if bytes.nil? - yield(bytes) end end @@ -92,7 +90,6 @@ def self.build_for(item) nil end - def self.is_active_storage_attachment?(item) defined?(ActiveStorage::Attachment) && item.is_a?(ActiveStorage::Attachment) end @@ -109,8 +106,10 @@ def initialize(blob) @blob = blob end - def each_chunk(&block) - @blob.download(&block) + def download_and_write_into(destination) + @blob.download do |bytes| + destination.write(bytes) + end end end diff --git a/lib/zipline/zip_handler.rb b/lib/zipline/zip_handler.rb index 9b9c819..77a4944 100644 --- a/lib/zipline/zip_handler.rb +++ b/lib/zipline/zip_handler.rb @@ -17,10 +17,8 @@ def handle_file(file, name, options) def write_item(item, name, options) retriever = pick_retriever_for(item) - @streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file| - retriever.each_chunk do |bytes| - writer_for_file << bytes - end + @streamer.write_file(name, **options.slice(:modification_time)) do |writable| + retriever.download_and_write_into(writable) end end From 637b1561f746870e539bd85548f566370f7d130d Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Wed, 20 Mar 2024 20:09:42 +0100 Subject: [PATCH 08/10] Continue --- lib/zipline/zip_handler.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/zipline/zip_handler.rb b/lib/zipline/zip_handler.rb index 77a4944..d9e2b85 100644 --- a/lib/zipline/zip_handler.rb +++ b/lib/zipline/zip_handler.rb @@ -3,14 +3,14 @@ def initialize(streamer, logger) @streamer = streamer @logger = logger end - + def handle_file(file, name, options) write_item(file, name, options) rescue => e # Since most APM packages do not trace errors occurring within streaming # Rack bodies, it can be helpful to print the error to the Rails log at least error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." - error_message += " The error occurred when handling file #{name.inspect}" + error_message += " The error occurred when handling file #{name.inspect} which was a #{file.class}" @logger&.error(error_message) raise end @@ -37,6 +37,6 @@ def pick_retriever_for(item) return maybe_retriever if maybe_retriever end - raise "Don't know how to handle a file in the shape of #{file_argument.inspect}" unless retriever + raise "Don't know how to handle a file in the shape of #{item.inspect}" end end From e1332b70e30857712f29a1cbdd0bdd2278d3c097 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Fri, 22 Mar 2024 14:22:20 +0100 Subject: [PATCH 09/10] Continue --- lib/zipline/retrievers.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zipline/retrievers.rb b/lib/zipline/retrievers.rb index 3f729ca..70c9bf7 100644 --- a/lib/zipline/retrievers.rb +++ b/lib/zipline/retrievers.rb @@ -42,7 +42,7 @@ def initialize(uri) def download_and_write_into(destination) Net::HTTP.get_response(@uri) do |response| response.read_body do |chunk| - destination.write(destination) + destination.write(chunk) end end end From e61bb94b9f134d59178152c57609a719da59e7d5 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Fri, 19 Apr 2024 12:19:04 +0200 Subject: [PATCH 10/10] Restructure retriever files --- lib/zipline.rb | 4 +- lib/zipline/retrievers.rb | 124 ------------------ .../retrievers/active_storage_retriever.rb | 34 +++++ .../retrievers/carrierwave_retriever.rb | 7 + lib/zipline/retrievers/file_retriever.rb | 21 +++ lib/zipline/retrievers/http_retriever.rb | 34 +++++ lib/zipline/retrievers/io_retriever.rb | 17 +++ lib/zipline/retrievers/paperclip_retriever.rb | 10 ++ lib/zipline/retrievers/shrine_retriever.rb | 18 +++ lib/zipline/retrievers/string_retriever.rb | 25 ++++ lib/zipline/zip_handler.rb | 24 +++- 11 files changed, 190 insertions(+), 128 deletions(-) create mode 100644 lib/zipline/retrievers/active_storage_retriever.rb create mode 100644 lib/zipline/retrievers/carrierwave_retriever.rb create mode 100644 lib/zipline/retrievers/file_retriever.rb create mode 100644 lib/zipline/retrievers/http_retriever.rb create mode 100644 lib/zipline/retrievers/io_retriever.rb create mode 100644 lib/zipline/retrievers/paperclip_retriever.rb create mode 100644 lib/zipline/retrievers/shrine_retriever.rb create mode 100644 lib/zipline/retrievers/string_retriever.rb diff --git a/lib/zipline.rb b/lib/zipline.rb index 3ac2afb..1bc5d39 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -11,7 +11,9 @@ module Zipline require_relative "zipline/version" require_relative "zipline/zip_handler" - require_relative "zipline/retrievers" + Dir.glob(__dir__ + "/zipline/retrievers/*.rb").sort.each do |retriever_module_path| + require retriever_module_path + end def self.included(into_controller) into_controller.include(ZipKit::RailsStreaming) diff --git a/lib/zipline/retrievers.rb b/lib/zipline/retrievers.rb index 70c9bf7..8b13789 100644 --- a/lib/zipline/retrievers.rb +++ b/lib/zipline/retrievers.rb @@ -1,125 +1 @@ -class Zipline::IORetriever - def self.build_for(item) - return new(item) if item.respond_to?(:read) && item.respond_to?(:read_nonblock) - end - def initialize(io) - @io = io - end - - def download_and_write_into(destination) - IO.copy_stream(@io, destination) - end -end - -class Zipline::FileRetriever < Zipline::IORetriever - def self.build_for(item) - return super(item) if item.is_a?(File) - end - - def download_and_write_into(destination) - @io.rewind - super(destination) - ensure - @io.close - end -end - -class Zipline::HTTPRetriever - def self.build_for(url_or_uri) - uri = begin - URI.parse(url_or_uri) - rescue - return - end - return new(uri) if uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) - end - - def initialize(uri) - @uri = uri - end - - def download_and_write_into(destination) - Net::HTTP.get_response(@uri) do |response| - response.read_body do |chunk| - destination.write(chunk) - end - end - end -end - -class Zipline::StringRetriever - def self.build_for(item) - return unless item.is_a?(String) - new(item) - end - - def initialize(string) - @string = string - end - - def download_and_write_into(destination) - chunk_size = 1024 - offset = 0 - loop do - bytes = @string.byteslice(offset, chunk_size) - offset += chunk_size - destination.write(bytes) - break if bytes.nil? - end - end - - def may_restart_after?(e) - false - end -end - -class Zipline::CarrierwaveRetriever - def self.build_for(item) - if defined?(CarrierWave::Storage::Fog::File) && item.is_a?(CarrierWave::Storage::Fog::File) - return Zipline::HTTPRetriever.new(item.url) - end - end -end - -class Zipline::ActiveStorageRetriever - def self.build_for(item) - return unless defined?(ActiveStorage) - return new(item.blob) if is_active_storage_attachment?(item) || is_active_storage_one?(item) - return new(item) if is_active_storage_blob?(item) - nil - end - - def self.is_active_storage_attachment?(item) - defined?(ActiveStorage::Attachment) && item.is_a?(ActiveStorage::Attachment) - end - - def self.is_active_storage_one?(item) - defined?(ActiveStorage::Attached::One) && item.is_a?(ActiveStorage::Attached::One) - end - - def self.is_active_storage_blob?(item) - defined?(ActiveStorage::Blob) && item.is_a?(ActiveStorage::Blob) - end - - def initialize(blob) - @blob = blob - end - - def download_and_write_into(destination) - @blob.download do |bytes| - destination.write(bytes) - end - end -end - -class Zipline::PaperclipRetriever - def self.build_for(item) - return unless defined?(Paperclip) && item.is_a?(Paperclip::Attachment) - if item.options[:storage] == :filesystem - Zipline::FileRetriever.build_for(File.open(item.path, "rb")) - else - Zipline::HTTPRetriever.build_for(file.expiring_url) - end - end -end diff --git a/lib/zipline/retrievers/active_storage_retriever.rb b/lib/zipline/retrievers/active_storage_retriever.rb new file mode 100644 index 0000000..4db82d9 --- /dev/null +++ b/lib/zipline/retrievers/active_storage_retriever.rb @@ -0,0 +1,34 @@ +class Zipline::ActiveStorageRetriever + def self.build_for(item) + return unless defined?(ActiveStorage) + return new(item.blob) if is_active_storage_attachment?(item) || is_active_storage_one?(item) + return new(item) if is_active_storage_blob?(item) + nil + end + + def self.is_active_storage_attachment?(item) + item.is_a?(ActiveStorage::Attachment) + end + + def self.is_active_storage_one?(item) + item.is_a?(ActiveStorage::Attached::One) + end + + def self.is_active_storage_blob?(item) + item.is_a?(ActiveStorage::Blob) + end + + def initialize(blob) + @blob = blob + end + + def retrieve_into(destination) + @blob.download do |bytes| + destination.write(bytes) + end + end + + def restartable?(_exception) + false + end +end diff --git a/lib/zipline/retrievers/carrierwave_retriever.rb b/lib/zipline/retrievers/carrierwave_retriever.rb new file mode 100644 index 0000000..bd130e6 --- /dev/null +++ b/lib/zipline/retrievers/carrierwave_retriever.rb @@ -0,0 +1,7 @@ +class Zipline::CarrierwaveRetriever + def self.build_for(item) + if defined?(CarrierWave::Storage::Fog::File) && item.is_a?(CarrierWave::Storage::Fog::File) + Zipline::HTTPRetriever.new(item.url) + end + end +end diff --git a/lib/zipline/retrievers/file_retriever.rb b/lib/zipline/retrievers/file_retriever.rb new file mode 100644 index 0000000..aa8c298 --- /dev/null +++ b/lib/zipline/retrievers/file_retriever.rb @@ -0,0 +1,21 @@ +class Zipline::FileRetriever + def self.build_for(item) + return new(item) if item.is_a?(File) + end + + def initialize(file) + @file = file + end + + def retrieve_into(destination) + @file.rewind + @file.binmode + IO.copy_stream(@file, destination) + ensure + @file.close + end + + def restartable?(_exception) + false + end +end diff --git a/lib/zipline/retrievers/http_retriever.rb b/lib/zipline/retrievers/http_retriever.rb new file mode 100644 index 0000000..eeae270 --- /dev/null +++ b/lib/zipline/retrievers/http_retriever.rb @@ -0,0 +1,34 @@ +class Zipline::HTTPRetriever + def self.build_for(url_or_uri) + return unless url_or_uri.is_a?(URI) || url_or_uri.is_a?(String) && url_or_uri.start_with?("http") + + uri = begin + URI.parse(url_or_uri) + rescue + return + end + + return new(uri) if uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS) + end + + def initialize(uri) + @uri = uri + end + + def retrieve_into(destination) + Net::HTTP.get_response(@uri) do |response| + response.read_body do |chunk| + destination.write(chunk) + end + end + end + + def restartable?(exception) + restartables = [ + Net::HTTPServerError, + Net::HTTPClientException, + Net::HTTPServiceUnavailable + ] + restartables.any? { |net_http_server_error_class| net_http_server_error_class === exception } + end +end diff --git a/lib/zipline/retrievers/io_retriever.rb b/lib/zipline/retrievers/io_retriever.rb new file mode 100644 index 0000000..b006b3f --- /dev/null +++ b/lib/zipline/retrievers/io_retriever.rb @@ -0,0 +1,17 @@ +class Zipline::IORetriever + def self.build_for(item) + return new(item) if item.respond_to?(:read) && item.respond_to?(:read_nonblock) + end + + def initialize(io) + @io = io + end + + def retrieve_into(destination) + IO.copy_stream(@io, destination) + end + + def restartable?(_exception) + false + end +end diff --git a/lib/zipline/retrievers/paperclip_retriever.rb b/lib/zipline/retrievers/paperclip_retriever.rb new file mode 100644 index 0000000..b76c66a --- /dev/null +++ b/lib/zipline/retrievers/paperclip_retriever.rb @@ -0,0 +1,10 @@ +class Zipline::PaperclipRetriever + def self.build_for(item) + return unless defined?(Paperclip) && item.is_a?(Paperclip::Attachment) + if item.options[:storage] == :filesystem + Zipline::FileRetriever.build_for(File.open(item.path, "rb")) + else + Zipline::HTTPRetriever.build_for(file.expiring_url) + end + end +end diff --git a/lib/zipline/retrievers/shrine_retriever.rb b/lib/zipline/retrievers/shrine_retriever.rb new file mode 100644 index 0000000..344eb6a --- /dev/null +++ b/lib/zipline/retrievers/shrine_retriever.rb @@ -0,0 +1,18 @@ +class Zipline::ShrineRetriever + def self.build_for(item) + return unless defined?(Shrine::UploadedFile) && item.is_a?(Shrine::UploadedFile) + new(item) + end + + def initialize(shrine_uploaded_file) + @shrine_uploaded_file = shrine_uploaded_file + end + + def retrieve_into(destination) + @shrine_uploaded_file.stream(destination) + end + + def restartable?(_exception) + false + end +end diff --git a/lib/zipline/retrievers/string_retriever.rb b/lib/zipline/retrievers/string_retriever.rb new file mode 100644 index 0000000..87cb8bb --- /dev/null +++ b/lib/zipline/retrievers/string_retriever.rb @@ -0,0 +1,25 @@ +class Zipline::StringRetriever + def self.build_for(item) + return unless item.is_a?(String) + new(item) + end + + def initialize(string) + @string = string + end + + def retrieve_into(destination) + chunk_size = 1024 + offset = 0 + loop do + bytes = @string.byteslice(offset, chunk_size) + offset += chunk_size + destination.write(bytes) + break if bytes.nil? + end + end + + def restartable?(_exception) + false + end +end diff --git a/lib/zipline/zip_handler.rb b/lib/zipline/zip_handler.rb index d9e2b85..df4c59d 100644 --- a/lib/zipline/zip_handler.rb +++ b/lib/zipline/zip_handler.rb @@ -17,8 +17,25 @@ def handle_file(file, name, options) def write_item(item, name, options) retriever = pick_retriever_for(item) - @streamer.write_file(name, **options.slice(:modification_time)) do |writable| - retriever.download_and_write_into(writable) + attempts = 0 + begin + attempts += 1 + @streamer.write_file(name, **options.slice(:modification_time)) do |writable| + ActiveSupport::Notifications.instrument("zipline.retrieve_and_write", {retriever_class: retriever.class.to_s, filename: name}) do + retriever.retrieve_into(writable) + end + end + rescue => exception + # If an exception is raised and it is known to be caused by the data retrieval from + # remote, we can retry outputting this particular file. ZipKit will rollback the file + # before raising the exception to us, so we can just redo the `write_file` call. + if retriever.restartable?(exception) + @logger.warn { "Reattempting of #{name.inspect} will be reattempted after #{exception}" } + retry + else + @logger.warn { "Retrieval of #{name.inspect} cannot be restarted after #{exception}, abprting" } + raise + end end end @@ -27,10 +44,11 @@ def pick_retriever_for(item) Zipline::CarrierwaveRetriever, Zipline::ActiveStorageRetriever, Zipline::PaperclipRetriever, + Zipline::ShrineRetriever, Zipline::FileRetriever, Zipline::IORetriever, Zipline::HTTPRetriever, - Zipline::StringRetriever, + Zipline::StringRetriever ] retriever_classes.each do |retriever_class| maybe_retriever = retriever_class.build_for(item)