diff --git a/.rubocop.yml b/.rubocop.yml index fec8967..c715ffb 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -3,7 +3,8 @@ inherit_from: .rubocop_todo.yml require: rubocop-rspec AllCops: - TargetRubyVersion: 3.0 + NewCops: enable + TargetRubyVersion: 3.1 Style/StringLiterals: EnforcedStyle: double_quotes diff --git a/lib/manifold.rb b/lib/manifold.rb index 7374f0d..791fec8 100644 --- a/lib/manifold.rb +++ b/lib/manifold.rb @@ -1,11 +1,12 @@ # frozen_string_literal: true +require "json" require "logger" require "pathname" require "thor" require "yaml" -Dir[File.join(__dir__, "manifold", "**", "*.rb")].sort.each do |file| +Dir[File.join(__dir__, "manifold", "**", "*.rb")].each do |file| require file end diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb new file mode 100644 index 0000000..4cbb12a --- /dev/null +++ b/lib/manifold/api/project.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Manifold + module API + # Projects API + class Project + attr_reader :name, :logger, :directory + + def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(name)) + @name = name + @logger = logger + @directory = Pathname(directory) + end + + def self.create(name, directory: Pathname.pwd.join(name)) + new(name, directory:).tap do |project| + [project.workspaces_directory, project.vectors_directory].each(&:mkpath) + end + end + + def workspaces + @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) } + end + + def generate + workspaces.each(&:generate) + end + + def workspaces_directory + directory.join("workspaces") + end + + def vectors_directory + directory.join("vectors") + end + + private + + def workspace_directories + workspaces_directory.children.select(&:directory?) + end + end + end +end diff --git a/lib/manifold/project/vector.rb b/lib/manifold/api/vector.rb similarity index 84% rename from lib/manifold/project/vector.rb rename to lib/manifold/api/vector.rb index 78f639d..3c28a8c 100644 --- a/lib/manifold/project/vector.rb +++ b/lib/manifold/api/vector.rb @@ -11,8 +11,8 @@ class Vector ).freeze def initialize(name, template_path: DEFAULT_TEMPLATE_PATH) - self.name = name - self.template_path = Pathname(template_path) + @name = name + @template_path = Pathname(template_path) end def add @@ -22,8 +22,6 @@ def add private - attr_writer :name, :template_path - def directory Pathname.pwd.join("vectors") end diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb new file mode 100644 index 0000000..0f33f4c --- /dev/null +++ b/lib/manifold/api/workspace.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true + +module Manifold + module API + # Encapsulates a single manifold. + class Workspace + attr_reader :name, :template_path, :logger + + DEFAULT_TEMPLATE_PATH = File.expand_path( + "../templates/workspace_template.yml", __dir__ + ).freeze + + def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($stdout)) + @name = name + @template_path = template_path + @logger = logger + @vector_service = Services::VectorService.new(logger) + end + + def self.from_directory(directory, logger: Logger.new($stdout)) + new(directory.basename.to_s, logger:) + end + + def add + [tables_directory, routines_directory].each(&:mkpath) + FileUtils.cp(template_path, manifold_path) + end + + def generate + return unless manifold_exists? && any_vectors? + + generate_dimensions + logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") + end + + def tables_directory + directory.join("tables") + end + + def routines_directory + directory.join("routines") + end + + def manifold_file + return nil unless manifold_exists? + + File.new(manifold_path) + end + + def manifold_exists? + manifold_path.file? + end + + def manifold_path + directory.join("manifold.yml") + end + + private + + def directory + Pathname.pwd.join("workspaces", name) + end + + def manifold_yaml + @manifold_yaml ||= YAML.safe_load_file(manifold_path) + end + + def generate_dimensions + dimensions_path.write(dimensions_schema_json.concat("\n")) + end + + def dimensions_schema + [ + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, + { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", + "fields" => dimensions_fields } + ] + end + + def dimensions_fields + vectors.filter_map do |vector| + logger.info("Loading vector schema for '#{vector}'.") + @vector_service.load_vector_schema(vector) + end + end + + def dimensions_schema_json + JSON.pretty_generate(dimensions_schema) + end + + def dimensions_path + tables_directory.join("dimensions.json") + end + + def any_vectors? + !(vectors.nil? || vectors.empty?) + end + + def vectors + manifold_yaml["vectors"] + end + end + end +end diff --git a/lib/manifold/cli.rb b/lib/manifold/cli.rb index d640d9a..e121480 100644 --- a/lib/manifold/cli.rb +++ b/lib/manifold/cli.rb @@ -11,14 +11,12 @@ def initialize(*args, logger: Logger.new($stdout)) self.logger = logger logger.level = Logger::INFO - - self.bq_service = Services::BigQueryService.new(logger) end desc "init NAME", "Generate a new umbrella project for data management" def init(name) Manifold::API::Project.create(name) - logger.info "Created umbrella project '#{name}' with projects and vectors directories." + logger.info "Created umbrella project '#{name}' with workspaces and vectors directories." end desc "vectors SUBCOMMAND ...ARGS", "Manage vectors" @@ -47,14 +45,12 @@ def add(name) logger.info "Added workspace '#{name}' with tables and routines directories." end - desc "generate PROJECT_NAME SERVICE", "Generate services for a project" - def generate(project_name, service) - case service - when "bq" - bq_service.generate_dimensions_schema(project_name) - else - logger.error("Unsupported service: #{service}") - end + desc "generate", "Generate BigQuery schema for all workspaces in the project" + def generate + name = Pathname.pwd.basename.to_s + project = API::Project.new(name, directory: Pathname.pwd, logger:) + project.generate + logger.info "Generated BigQuery schema for all workspaces in the project." end end end diff --git a/lib/manifold/project/project.rb b/lib/manifold/project/project.rb deleted file mode 100644 index abc27ea..0000000 --- a/lib/manifold/project/project.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -module Manifold - module API - # Projects API - class Project - attr_reader :name, :directory - - def initialize(name, directory: Pathname.pwd.join(name)) - self.name = name - self.directory = Pathname(directory) - end - - def self.create(name, directory: Pathname.pwd.join(name)) - new(name, directory: directory).tap do |project| - [project.workspaces_directory, project.vectors_directory].each(&:mkpath) - end - end - - def workspaces_directory - directory.join("workspaces") - end - - def vectors_directory - directory.join("vectors") - end - - private - - attr_writer :name, :directory - end - end -end diff --git a/lib/manifold/project/workspace.rb b/lib/manifold/project/workspace.rb deleted file mode 100644 index a8bd0f9..0000000 --- a/lib/manifold/project/workspace.rb +++ /dev/null @@ -1,54 +0,0 @@ -# frozen_string_literal: true - -module Manifold - module API - # Encapsulates a single manifold. - class Workspace - attr_reader :name, :template_path - - DEFAULT_TEMPLATE_PATH = File.expand_path( - "../templates/workspace_template.yml", __dir__ - ).freeze - - def initialize(name, template_path: DEFAULT_TEMPLATE_PATH) - self.name = name - self.template_path = template_path - end - - def add - [tables_directory, routines_directory].each(&:mkpath) - FileUtils.cp(template_path, manifold_path) - end - - def tables_directory - directory.join("tables") - end - - def routines_directory - directory.join("routines") - end - - def manifold_file - return nil unless manifold_exists? - - File.new(manifold_path) - end - - def manifold_exists? - manifold_path.file? - end - - def manifold_path - directory.join("manifold.yml") - end - - private - - def directory - Pathname.pwd.join("workspaces", name) - end - - attr_writer :name, :template_path - end - end -end diff --git a/lib/manifold/services/big_query_service.rb b/lib/manifold/services/big_query_service.rb deleted file mode 100644 index fe8806e..0000000 --- a/lib/manifold/services/big_query_service.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -module Manifold - module Services - # Handles the generation of BigQuery schemas based on project configurations - class BigQueryService - def initialize(logger) - @logger = logger - @vector_service = Manifold::Services::VectorService.new(logger) - end - - def generate_dimensions_schema(project_name) - config_path = Pathname.pwd.join("projects", project_name, "manifold.yml") - return unless validate_config_exists(config_path, project_name) - - config = YAML.safe_load_file(config_path) - - fields = config["vectors"].reduce([]) do |list, vector| - @logger.info("Loading vector schema for '#{vector}'.") - [*@vector_service.load_vector_schema(vector), *list] - end - - create_dimensions_file(project_name, fields) - end - - private - - def validate_config_exists(config_path, project_name) - unless config_path.file? - @logger.error("Config file missing for project '#{project_name}'.") - return false - end - true - end - - def create_dimensions_file(project_name, fields) - tables_directory(project_name).mkpath - dimensions = dimensions_schema(fields) - - dimensions_path(project_name).write(dimensions) - @logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.") - end - - def dimensions_schema(fields) - JSON.pretty_generate([ - { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, - { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", - "fields" => fields } - ]).concat("\n") - end - - def tables_directory(project_name) - Pathname.pwd.join("projects", project_name, "bq", "tables") - end - - def dimensions_path(project_name) - tables_directory(project_name).join("dimensions.json") - end - end - end -end diff --git a/lib/manifold/services/vector_service.rb b/lib/manifold/services/vector_service.rb index 1886dbb..40d7550 100644 --- a/lib/manifold/services/vector_service.rb +++ b/lib/manifold/services/vector_service.rb @@ -10,14 +10,13 @@ def initialize(logger) def load_vector_schema(vector_name) path = config_path(vector_name) - unless path.file? - @logger.error("Vector configuration not found: #{path}") - return nil - end - config = YAML.safe_load_file(path) fields = transform_attributes_to_schema(config["attributes"]) { "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields } + rescue Errno::ENOENT, Errno::EISDIR + raise "Vector configuration not found: #{path}" + rescue Psych::Exception => e + raise "Invalid YAML in vector configuration #{path}: #{e.message}" end private diff --git a/manifold.gemspec b/manifold.gemspec index 15cff7a..a27a350 100644 --- a/manifold.gemspec +++ b/manifold.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |spec| spec.summary = "A CLI for managing data infrastructures in BigQuery" spec.homepage = "https://github.com/bustle/manifold" spec.license = "MIT" - spec.required_ruby_version = ">= 3.0.0" + spec.required_ruby_version = ">= 3.1.0" spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/bustle/manifold" @@ -32,4 +32,5 @@ Gem::Specification.new do |spec| # Uncomment to register a new dependency of your gem spec.add_dependency "thor" + spec.metadata["rubygems_mfa_required"] = "true" end diff --git a/spec/manifold/api/project_spec.rb b/spec/manifold/api/project_spec.rb index 62b764f..0b09a5a 100644 --- a/spec/manifold/api/project_spec.rb +++ b/spec/manifold/api/project_spec.rb @@ -7,7 +7,9 @@ let(:name) { "wetland" } - it { is_expected.to have_attributes(name: name) } + include_context "with template files" + + it { is_expected.to have_attributes(name:) } describe ".create" do before { described_class.create(name) } @@ -30,11 +32,11 @@ end context "with directory" do - subject(:project) { described_class.new(name, directory: directory) } + subject(:project) { described_class.new(name, directory:) } let(:directory) { Pathname.pwd.join("supplied_directory") } - it { is_expected.to have_attributes(directory: directory) } + it { is_expected.to have_attributes(directory:) } it "uses it as the base for the vectors directory" do expect(project.vectors_directory).to eq directory.join("vectors") @@ -44,4 +46,23 @@ expect(project.workspaces_directory).to eq directory.join("workspaces") end end + + describe "#generate" do + let(:workspace_one) { instance_double(Manifold::API::Workspace) } + let(:workspace_two) { instance_double(Manifold::API::Workspace) } + + before do + described_class.create(name) + + [workspace_one, workspace_two].each do |workspace| + project.workspaces << workspace + allow(workspace).to receive(:generate) + end + end + + it "calls generate on each workspace" do + project.generate + expect([workspace_one, workspace_two]).to all(have_received(:generate)) + end + end end diff --git a/spec/manifold/api/vector_spec.rb b/spec/manifold/api/vector_spec.rb index d3ddcfd..3a6dfd4 100644 --- a/spec/manifold/api/vector_spec.rb +++ b/spec/manifold/api/vector_spec.rb @@ -8,7 +8,7 @@ let(:name) { "page" } - it { is_expected.to have_attributes(name: name) } + it { is_expected.to have_attributes(name:) } describe ".add" do before { vector.add } diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 06825fd..c119a59 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -2,13 +2,15 @@ RSpec.describe Manifold::API::Workspace do include FakeFS::SpecHelpers - subject(:workspace) { described_class.new(name) } - include_context "with template files" + subject(:workspace) { described_class.new(name, logger:) } + let(:logger) { instance_spy(Logger) } let(:name) { "people" } - it { is_expected.to have_attributes(name: name) } + include_context "with template files" + + it { is_expected.to have_attributes(name:) } describe ".add" do before { workspace.add } @@ -55,4 +57,77 @@ it { expect(workspace.manifold_file).to be_an_instance_of(File) } end end + + describe "#generate" do + context "when the manifold configuration exists" do + before do + Pathname.pwd.join("vectors").mkpath + Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) + attributes: + user_id: string + email: string + YAML + + workspace.add + workspace.manifold_path.write(<<~YAML) + vectors: + - User + YAML + + workspace.generate + end + + it "generates a dimensions schema file" do + expect(workspace.tables_directory.join("dimensions.json")).to be_file + end + + it "sets the ID field" do + schema = parse_dimensions_schema + expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) + end + + it "sets the dimensions fields" do + expect(get_dimension("user")["fields"]).to include( + { "type" => "STRING", "name" => "user_id", "mode" => "NULLABLE" }, + { "type" => "STRING", "name" => "email", "mode" => "NULLABLE" } + ) + end + + it "logs vector schema loading" do + expect(logger).to have_received(:info).with("Loading vector schema for 'User'.") + end + + it "logs successful generation" do + expect(logger).to have_received(:info) + .with("Generated BigQuery dimensions table schema for workspace '#{name}'.") + end + + def parse_dimensions_schema + JSON.parse(workspace.tables_directory.join("dimensions.json").read) + end + + def get_dimension(field) + dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } + dimensions["fields"].find { |f| f["name"] == field } + end + end + + context "when the manifold configuration is missing" do + it "returns nil" do + expect(workspace.generate).to be_nil + end + end + + context "when the manifold configuration has no vectors" do + before do + workspace.add + workspace.manifold_path.write("vectors:\n") + workspace.generate + end + + it "returns nil" do + expect(workspace.generate).to be_nil + end + end + end end diff --git a/spec/manifold/cli_spec.rb b/spec/manifold/cli_spec.rb index aadab73..33310db 100644 --- a/spec/manifold/cli_spec.rb +++ b/spec/manifold/cli_spec.rb @@ -34,7 +34,7 @@ it "logs the project creation" do cli.init(project_name) expect(null_logger).to have_received(:info) - .with("Created umbrella project '#{project_name}' with projects and vectors directories.") + .with("Created umbrella project '#{project_name}' with workspaces and vectors directories.") end end end diff --git a/spec/manifold/services/big_query_service_spec.rb b/spec/manifold/services/big_query_service_spec.rb deleted file mode 100644 index ee7a584..0000000 --- a/spec/manifold/services/big_query_service_spec.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -require "fakefs/spec_helpers" - -RSpec.describe Manifold::Services::BigQueryService do - include FakeFS::SpecHelpers - - let(:logger) { instance_spy(Logger) } - let(:service) { described_class.new(logger) } - let(:project_name) { "test_project" } - let(:dimensions_path) do - Pathname.pwd.join("projects", project_name, "bq", "tables", "dimensions.json") - end - - before do - Pathname.pwd.join("projects", project_name).mkpath - end - - describe "#generate_dimensions_schema" do - context "when the project configuration exists" do - before do - Pathname.pwd.join("vectors").mkpath - Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) - attributes: - user_id: string - email: string - YAML - - Pathname.pwd.join("projects", project_name, "manifold.yml").write(<<~YAML) - vectors: - - User - YAML - - service.generate_dimensions_schema(project_name) - end - - it "generates a dimensions schema file" do - expect(dimensions_path.file?).to be true - end - - it "includes the expected schema structure" do - schema = JSON.parse(dimensions_path.read) - expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) - end - end - - context "when the project configuration is missing" do - it "indicates the configuration is missing" do - service.generate_dimensions_schema(project_name) - expect(logger).to have_received(:error) - .with(/Config file missing for project/) - end - end - end -end diff --git a/spec/manifold/services/vector_service_spec.rb b/spec/manifold/services/vector_service_spec.rb index ee4903c..529b2a1 100644 --- a/spec/manifold/services/vector_service_spec.rb +++ b/spec/manifold/services/vector_service_spec.rb @@ -51,16 +51,24 @@ allow(logger).to receive(:error) end - it "returns nil" do - expect(service.load_vector_schema(vector_name)).to be_nil + it "raises an error" do + expect { service.load_vector_schema(vector_name) }.to raise_error( + "Vector configuration not found: #{Pathname.pwd.join("vectors", "#{vector_name}.yml")}" + ) end + end - it "logs an error message" do - path = Pathname.pwd.join("vectors", "#{vector_name}.yml") - service.load_vector_schema(vector_name) + context "when vector configuration is invalid" do + before do + Pathname.pwd.join("vectors").mkpath + config_path = Pathname.pwd.join("vectors", "#{vector_name}.yml") + config_path.write("invalid_key: [value1, value2") + end - expect(logger).to have_received(:error) - .with("Vector configuration not found: #{path}") + it "raises an error" do + expect { service.load_vector_schema(vector_name) }.to raise_error( + /Invalid YAML in vector configuration/ + ) end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 8b65545..45d1eb1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -7,7 +7,7 @@ require "simplecov-json" require "simplecov-lcov" require "fakefs/spec_helpers" -Dir[File.join(__dir__, "support", "**", "*.rb")].sort.each { |f| require f } +Dir[File.join(__dir__, "support", "**", "*.rb")].each { |f| require f } SimpleCov::Formatter::LcovFormatter.config do |c| c.report_with_single_file = true