From 3d38ae1df2d3c942b51255a42d45dd1979356b63 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 06:24:21 -0500 Subject: [PATCH 01/17] Rename project to api to reflect app hierarchy --- lib/manifold/{project => api}/project.rb | 0 lib/manifold/{project => api}/vector.rb | 0 lib/manifold/{project => api}/workspace.rb | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename lib/manifold/{project => api}/project.rb (100%) rename lib/manifold/{project => api}/vector.rb (100%) rename lib/manifold/{project => api}/workspace.rb (100%) diff --git a/lib/manifold/project/project.rb b/lib/manifold/api/project.rb similarity index 100% rename from lib/manifold/project/project.rb rename to lib/manifold/api/project.rb diff --git a/lib/manifold/project/vector.rb b/lib/manifold/api/vector.rb similarity index 100% rename from lib/manifold/project/vector.rb rename to lib/manifold/api/vector.rb diff --git a/lib/manifold/project/workspace.rb b/lib/manifold/api/workspace.rb similarity index 100% rename from lib/manifold/project/workspace.rb rename to lib/manifold/api/workspace.rb From 98408997787d6ec29fc94c978d4b5d11a9cf308e Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 06:32:42 -0500 Subject: [PATCH 02/17] Factor out BigQueryService --- lib/manifold/api/workspace.rb | 38 ++++++++++- lib/manifold/cli.rb | 15 ++--- lib/manifold/services/big_query_service.rb | 61 ----------------- spec/manifold/api/workspace_spec.rb | 66 ++++++++++++++++++- .../services/big_query_service_spec.rb | 55 ---------------- 5 files changed, 106 insertions(+), 129 deletions(-) delete mode 100644 lib/manifold/services/big_query_service.rb delete mode 100644 spec/manifold/services/big_query_service_spec.rb diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index a8bd0f9..2130903 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -10,9 +10,11 @@ class Workspace "../templates/workspace_template.yml", __dir__ ).freeze - def initialize(name, template_path: DEFAULT_TEMPLATE_PATH) + def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($stdout)) self.name = name self.template_path = template_path + @logger = logger + @vector_service = Services::VectorService.new(logger) end def add @@ -20,6 +22,20 @@ def add FileUtils.cp(template_path, manifold_path) end + def generate + return unless manifold_exists? + + config = YAML.safe_load_file(manifold_path) + return if config["vectors"].nil? || config["vectors"].empty? + + fields = config["vectors"].reduce([]) do |list, vector| + @logger.info("Loading vector schema for '#{vector}'.") + [*@vector_service.load_vector_schema(vector), *list] + end + + create_dimensions_file(fields) + end + def tables_directory directory.join("tables") end @@ -48,6 +64,26 @@ def directory Pathname.pwd.join("workspaces", name) end + def create_dimensions_file(fields) + tables_directory.mkpath + dimensions = dimensions_schema(fields) + + dimensions_path.write(dimensions) + @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") + end + + def dimensions_schema(fields) + JSON.pretty_generate([ + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, + { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", + "fields" => fields } + ]).concat("\n") + end + + def dimensions_path + tables_directory.join("dimensions.json") + end + attr_writer :name, :template_path end end diff --git a/lib/manifold/cli.rb b/lib/manifold/cli.rb index d640d9a..0b1016c 100644 --- a/lib/manifold/cli.rb +++ b/lib/manifold/cli.rb @@ -11,8 +11,6 @@ def initialize(*args, logger: Logger.new($stdout)) self.logger = logger logger.level = Logger::INFO - - self.bq_service = Services::BigQueryService.new(logger) end desc "init NAME", "Generate a new umbrella project for data management" @@ -47,14 +45,11 @@ def add(name) logger.info "Added workspace '#{name}' with tables and routines directories." end - desc "generate PROJECT_NAME SERVICE", "Generate services for a project" - def generate(project_name, service) - case service - when "bq" - bq_service.generate_dimensions_schema(project_name) - else - logger.error("Unsupported service: #{service}") - end + desc "generate WORKSPACE_NAME", "Generate BigQuery schema for a workspace" + def generate(name) + workspace = API::Workspace.new(name, logger: logger) + workspace.generate + logger.info "Generated BigQuery schema for workspace '#{name}'." end end end diff --git a/lib/manifold/services/big_query_service.rb b/lib/manifold/services/big_query_service.rb deleted file mode 100644 index fe8806e..0000000 --- a/lib/manifold/services/big_query_service.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -module Manifold - module Services - # Handles the generation of BigQuery schemas based on project configurations - class BigQueryService - def initialize(logger) - @logger = logger - @vector_service = Manifold::Services::VectorService.new(logger) - end - - def generate_dimensions_schema(project_name) - config_path = Pathname.pwd.join("projects", project_name, "manifold.yml") - return unless validate_config_exists(config_path, project_name) - - config = YAML.safe_load_file(config_path) - - fields = config["vectors"].reduce([]) do |list, vector| - @logger.info("Loading vector schema for '#{vector}'.") - [*@vector_service.load_vector_schema(vector), *list] - end - - create_dimensions_file(project_name, fields) - end - - private - - def validate_config_exists(config_path, project_name) - unless config_path.file? - @logger.error("Config file missing for project '#{project_name}'.") - return false - end - true - end - - def create_dimensions_file(project_name, fields) - tables_directory(project_name).mkpath - dimensions = dimensions_schema(fields) - - dimensions_path(project_name).write(dimensions) - @logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.") - end - - def dimensions_schema(fields) - JSON.pretty_generate([ - { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, - { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", - "fields" => fields } - ]).concat("\n") - end - - def tables_directory(project_name) - Pathname.pwd.join("projects", project_name, "bq", "tables") - end - - def dimensions_path(project_name) - tables_directory(project_name).join("dimensions.json") - end - end - end -end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 06825fd..7c64068 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -2,12 +2,14 @@ RSpec.describe Manifold::API::Workspace do include FakeFS::SpecHelpers - subject(:workspace) { described_class.new(name) } - include_context "with template files" + subject(:workspace) { described_class.new(name, logger: logger) } + let(:logger) { instance_spy(Logger) } let(:name) { "people" } + include_context "with template files" + it { is_expected.to have_attributes(name: name) } describe ".add" do @@ -55,4 +57,64 @@ it { expect(workspace.manifold_file).to be_an_instance_of(File) } end end + + describe "#generate" do + context "when the manifold configuration exists" do + before do + # Create necessary directories and files + Pathname.pwd.join("vectors").mkpath + Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) + attributes: + user_id: string + email: string + YAML + + workspace.add # Creates the workspace directory and manifold.yml + workspace.manifold_path.write(<<~YAML) + vectors: + - User + YAML + + workspace.generate + end + + it "generates a dimensions schema file" do + expect(workspace.tables_directory.join("dimensions.json")).to be_file + end + + it "includes the expected schema structure" do + schema = JSON.parse(workspace.tables_directory.join("dimensions.json").read) + expect(schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "logs vector schema loading" do + expect(logger).to have_received(:info).with("Loading vector schema for 'User'.") + end + + it "logs successful generation" do + expect(logger).to have_received(:info) + .with("Generated BigQuery dimensions table schema for workspace '#{name}'.") + end + end + + context "when the manifold configuration is missing" do + it "returns nil" do + expect(workspace.generate).to be_nil + end + end + + context "when the manifold configuration has no vectors" do + before do + workspace.add + workspace.manifold_path.write("vectors:\n") + workspace.generate + end + + it "returns nil" do + expect(workspace.generate).to be_nil + end + end + end end diff --git a/spec/manifold/services/big_query_service_spec.rb b/spec/manifold/services/big_query_service_spec.rb deleted file mode 100644 index ee7a584..0000000 --- a/spec/manifold/services/big_query_service_spec.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -require "fakefs/spec_helpers" - -RSpec.describe Manifold::Services::BigQueryService do - include FakeFS::SpecHelpers - - let(:logger) { instance_spy(Logger) } - let(:service) { described_class.new(logger) } - let(:project_name) { "test_project" } - let(:dimensions_path) do - Pathname.pwd.join("projects", project_name, "bq", "tables", "dimensions.json") - end - - before do - Pathname.pwd.join("projects", project_name).mkpath - end - - describe "#generate_dimensions_schema" do - context "when the project configuration exists" do - before do - Pathname.pwd.join("vectors").mkpath - Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) - attributes: - user_id: string - email: string - YAML - - Pathname.pwd.join("projects", project_name, "manifold.yml").write(<<~YAML) - vectors: - - User - YAML - - service.generate_dimensions_schema(project_name) - end - - it "generates a dimensions schema file" do - expect(dimensions_path.file?).to be true - end - - it "includes the expected schema structure" do - schema = JSON.parse(dimensions_path.read) - expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) - end - end - - context "when the project configuration is missing" do - it "indicates the configuration is missing" do - service.generate_dimensions_schema(project_name) - expect(logger).to have_received(:error) - .with(/Config file missing for project/) - end - end - end -end From 71435c1ab1bbf875f080995dc797245b56752f24 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 06:44:17 -0500 Subject: [PATCH 03/17] Break out vectors helpers --- lib/manifold/api/workspace.rb | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 2130903..5c2e85a 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -23,12 +23,9 @@ def add end def generate - return unless manifold_exists? + return unless manifold_exists? && any_vectors? - config = YAML.safe_load_file(manifold_path) - return if config["vectors"].nil? || config["vectors"].empty? - - fields = config["vectors"].reduce([]) do |list, vector| + fields = vectors.reduce([]) do |list, vector| @logger.info("Loading vector schema for '#{vector}'.") [*@vector_service.load_vector_schema(vector), *list] end @@ -50,6 +47,10 @@ def manifold_file File.new(manifold_path) end + def manifold_yaml + @manifold_yaml ||= YAML.safe_load_file(manifold_path) + end + def manifold_exists? manifold_path.file? end @@ -66,9 +67,7 @@ def directory def create_dimensions_file(fields) tables_directory.mkpath - dimensions = dimensions_schema(fields) - - dimensions_path.write(dimensions) + dimensions_path.write(dimensions_schema(fields)) @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") end @@ -84,6 +83,14 @@ def dimensions_path tables_directory.join("dimensions.json") end + def any_vectors? + !(vectors.nil? || vectors.empty?) + end + + def vectors + manifold_yaml["vectors"] + end + attr_writer :name, :template_path end end From 7ac49d664c8e645ef7c72f183b9f51593b902649 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 08:13:45 -0500 Subject: [PATCH 04/17] Set up project.generate --- lib/manifold/api/project.rb | 19 ++++++++++++++++--- lib/manifold/api/workspace.rb | 12 ++++++++---- lib/manifold/cli.rb | 13 +++++++------ spec/manifold/cli_spec.rb | 2 +- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb index abc27ea..2f3a1a6 100644 --- a/lib/manifold/api/project.rb +++ b/lib/manifold/api/project.rb @@ -4,10 +4,11 @@ module Manifold module API # Projects API class Project - attr_reader :name, :directory + attr_reader :name, :logger, :directory - def initialize(name, directory: Pathname.pwd.join(name)) + def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(name)) self.name = name + self.logger = logger self.directory = Pathname(directory) end @@ -17,6 +18,10 @@ def self.create(name, directory: Pathname.pwd.join(name)) end end + def generate + workspaces.each(&:generate) + end + def workspaces_directory directory.join("workspaces") end @@ -27,7 +32,15 @@ def vectors_directory private - attr_writer :name, :directory + def workspaces + @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger: logger) } + end + + def workspace_directories + workspaces_directory.children.select(&:directory?) + end + + attr_writer :name, :logger, :directory end end end diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 5c2e85a..7440f2f 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -17,6 +17,10 @@ def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($s @vector_service = Services::VectorService.new(logger) end + def self.from_directory(directory, logger: Logger.new($stdout)) + new(directory.basename.to_s, logger: logger) + end + def add [tables_directory, routines_directory].each(&:mkpath) FileUtils.cp(template_path, manifold_path) @@ -47,10 +51,6 @@ def manifold_file File.new(manifold_path) end - def manifold_yaml - @manifold_yaml ||= YAML.safe_load_file(manifold_path) - end - def manifold_exists? manifold_path.file? end @@ -65,6 +65,10 @@ def directory Pathname.pwd.join("workspaces", name) end + def manifold_yaml + @manifold_yaml ||= YAML.safe_load_file(manifold_path) + end + def create_dimensions_file(fields) tables_directory.mkpath dimensions_path.write(dimensions_schema(fields)) diff --git a/lib/manifold/cli.rb b/lib/manifold/cli.rb index 0b1016c..d81b7c8 100644 --- a/lib/manifold/cli.rb +++ b/lib/manifold/cli.rb @@ -16,7 +16,7 @@ def initialize(*args, logger: Logger.new($stdout)) desc "init NAME", "Generate a new umbrella project for data management" def init(name) Manifold::API::Project.create(name) - logger.info "Created umbrella project '#{name}' with projects and vectors directories." + logger.info "Created umbrella project '#{name}' with workspaces and vectors directories." end desc "vectors SUBCOMMAND ...ARGS", "Manage vectors" @@ -45,11 +45,12 @@ def add(name) logger.info "Added workspace '#{name}' with tables and routines directories." end - desc "generate WORKSPACE_NAME", "Generate BigQuery schema for a workspace" - def generate(name) - workspace = API::Workspace.new(name, logger: logger) - workspace.generate - logger.info "Generated BigQuery schema for workspace '#{name}'." + desc "generate", "Generate BigQuery schema for all workspaces in the project" + def generate + name = Pathname.pwd.basename.to_s + project = API::Project.new(name, logger: logger) + project.generate + logger.info "Generated BigQuery schema for all workspaces in the project." end end end diff --git a/spec/manifold/cli_spec.rb b/spec/manifold/cli_spec.rb index aadab73..33310db 100644 --- a/spec/manifold/cli_spec.rb +++ b/spec/manifold/cli_spec.rb @@ -34,7 +34,7 @@ it "logs the project creation" do cli.init(project_name) expect(null_logger).to have_received(:info) - .with("Created umbrella project '#{project_name}' with projects and vectors directories.") + .with("Created umbrella project '#{project_name}' with workspaces and vectors directories.") end end end From 95c7c13e2a586202e3882d7c2b27abde2609213b Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 08:32:37 -0500 Subject: [PATCH 05/17] Add a project.generate test --- lib/manifold/api/project.rb | 8 ++++---- spec/manifold/api/project_spec.rb | 21 +++++++++++++++++++++ spec/manifold/api/workspace_spec.rb | 3 +-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb index 2f3a1a6..e4e8f86 100644 --- a/lib/manifold/api/project.rb +++ b/lib/manifold/api/project.rb @@ -18,6 +18,10 @@ def self.create(name, directory: Pathname.pwd.join(name)) end end + def workspaces + @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger: logger) } + end + def generate workspaces.each(&:generate) end @@ -32,10 +36,6 @@ def vectors_directory private - def workspaces - @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger: logger) } - end - def workspace_directories workspaces_directory.children.select(&:directory?) end diff --git a/spec/manifold/api/project_spec.rb b/spec/manifold/api/project_spec.rb index 62b764f..70f0e9c 100644 --- a/spec/manifold/api/project_spec.rb +++ b/spec/manifold/api/project_spec.rb @@ -7,6 +7,8 @@ let(:name) { "wetland" } + include_context "with template files" + it { is_expected.to have_attributes(name: name) } describe ".create" do @@ -44,4 +46,23 @@ expect(project.workspaces_directory).to eq directory.join("workspaces") end end + + describe "#generate" do + let(:workspace1) { instance_double(Manifold::API::Workspace) } + let(:workspace2) { instance_double(Manifold::API::Workspace) } + + before do + described_class.create(name) + + [workspace1, workspace2].each do |workspace| + project.workspaces << workspace + allow(workspace).to receive(:generate) + end + end + + it "calls generate on each workspace" do + project.generate + expect([workspace1, workspace2]).to all(have_received(:generate)) + end + end end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 7c64068..12bb3fc 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -61,7 +61,6 @@ describe "#generate" do context "when the manifold configuration exists" do before do - # Create necessary directories and files Pathname.pwd.join("vectors").mkpath Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) attributes: @@ -69,7 +68,7 @@ email: string YAML - workspace.add # Creates the workspace directory and manifold.yml + workspace.add workspace.manifold_path.write(<<~YAML) vectors: - User From 25200743420b840ba2b62842691cbdec2c528467 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 08:47:18 -0500 Subject: [PATCH 06/17] fix bugs --- lib/manifold.rb | 1 + lib/manifold/api/workspace.rb | 2 +- lib/manifold/cli.rb | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/manifold.rb b/lib/manifold.rb index 7374f0d..2929cb9 100644 --- a/lib/manifold.rb +++ b/lib/manifold.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "json" require "logger" require "pathname" require "thor" diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 7440f2f..5ef824e 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -31,7 +31,7 @@ def generate fields = vectors.reduce([]) do |list, vector| @logger.info("Loading vector schema for '#{vector}'.") - [*@vector_service.load_vector_schema(vector), *list] + list << @vector_service.load_vector_schema(vector) end create_dimensions_file(fields) diff --git a/lib/manifold/cli.rb b/lib/manifold/cli.rb index d81b7c8..1f4437c 100644 --- a/lib/manifold/cli.rb +++ b/lib/manifold/cli.rb @@ -48,7 +48,7 @@ def add(name) desc "generate", "Generate BigQuery schema for all workspaces in the project" def generate name = Pathname.pwd.basename.to_s - project = API::Project.new(name, logger: logger) + project = API::Project.new(name, directory: Pathname.pwd, logger: logger) project.generate logger.info "Generated BigQuery schema for all workspaces in the project." end From 344cd945463352102674420d5bff761fc8d314ac Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 08:56:54 -0500 Subject: [PATCH 07/17] some light segmentation --- lib/manifold/api/workspace.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 5ef824e..65939f4 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -71,16 +71,20 @@ def manifold_yaml def create_dimensions_file(fields) tables_directory.mkpath - dimensions_path.write(dimensions_schema(fields)) + dimensions_path.write(dimensions_schema_json(fields).concat("\n")) @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") end def dimensions_schema(fields) - JSON.pretty_generate([ - { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, - { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", - "fields" => fields } - ]).concat("\n") + [ + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, + { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", + "fields" => fields } + ] + end + + def dimensions_schema_json(fields) + JSON.pretty_generate(dimensions_schema(fields)) end def dimensions_path From c1563549811bd8a9fd06ff988b45824e05a4ea43 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 09:02:34 -0500 Subject: [PATCH 08/17] OOP'ify workspace generation --- lib/manifold/api/workspace.rb | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 65939f4..6f0a6ba 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -29,12 +29,8 @@ def add def generate return unless manifold_exists? && any_vectors? - fields = vectors.reduce([]) do |list, vector| - @logger.info("Loading vector schema for '#{vector}'.") - list << @vector_service.load_vector_schema(vector) - end - - create_dimensions_file(fields) + generate_dimensions + @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") end def tables_directory @@ -69,22 +65,27 @@ def manifold_yaml @manifold_yaml ||= YAML.safe_load_file(manifold_path) end - def create_dimensions_file(fields) - tables_directory.mkpath - dimensions_path.write(dimensions_schema_json(fields).concat("\n")) - @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") + def generate_dimensions + dimensions_path.write(dimensions_schema_json.concat("\n")) end - def dimensions_schema(fields) + def dimensions_schema [ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED", - "fields" => fields } + "fields" => dimensions_fields } ] end - def dimensions_schema_json(fields) - JSON.pretty_generate(dimensions_schema(fields)) + def dimensions_fields + vectors.reduce([]) do |list, vector| + @logger.info("Loading vector schema for '#{vector}'.") + list << @vector_service.load_vector_schema(vector) + end + end + + def dimensions_schema_json + JSON.pretty_generate(dimensions_schema) end def dimensions_path From 8fcf9a860da858e18d8998840d5f7d3f08838003 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 09:22:45 -0500 Subject: [PATCH 09/17] tighten schema assertions --- spec/manifold/api/workspace_spec.rb | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 12bb3fc..520ea7b 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -81,10 +81,15 @@ expect(workspace.tables_directory.join("dimensions.json")).to be_file end - it "includes the expected schema structure" do - schema = JSON.parse(workspace.tables_directory.join("dimensions.json").read) - expect(schema).to include( - { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + it "sets the ID field" do + schema = parse_dimensions_schema + expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) + end + + it "sets the dimensions fields" do + expect(get_dimension("user")["fields"]).to include( + { "type" => "STRING", "name" => "user_id", "mode" => "NULLABLE" }, + { "type" => "STRING", "name" => "email", "mode" => "NULLABLE" } ) end @@ -96,6 +101,15 @@ expect(logger).to have_received(:info) .with("Generated BigQuery dimensions table schema for workspace '#{name}'.") end + + def parse_dimensions_schema + JSON.parse(workspace.tables_directory.join("dimensions.json").read) + end + + def get_dimension(field) + dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } + dimensions["fields"].find { |f| f["name"] == field } + end end context "when the manifold configuration is missing" do From 215cbaab3bb0cde6e59f05a74853e38df2e099e4 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:45:05 -0500 Subject: [PATCH 10/17] Set 3.1 minimum and enable new cops --- .rubocop.yml | 3 ++- lib/manifold.rb | 2 +- lib/manifold/api/project.rb | 2 +- lib/manifold/api/workspace.rb | 2 +- lib/manifold/cli.rb | 2 +- manifold.gemspec | 3 ++- spec/manifold/api/project_spec.rb | 8 ++++---- spec/manifold/api/workspace_spec.rb | 2 +- spec/spec_helper.rb | 2 +- 9 files changed, 14 insertions(+), 12 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index fec8967..c715ffb 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -3,7 +3,8 @@ inherit_from: .rubocop_todo.yml require: rubocop-rspec AllCops: - TargetRubyVersion: 3.0 + NewCops: enable + TargetRubyVersion: 3.1 Style/StringLiterals: EnforcedStyle: double_quotes diff --git a/lib/manifold.rb b/lib/manifold.rb index 2929cb9..791fec8 100644 --- a/lib/manifold.rb +++ b/lib/manifold.rb @@ -6,7 +6,7 @@ require "thor" require "yaml" -Dir[File.join(__dir__, "manifold", "**", "*.rb")].sort.each do |file| +Dir[File.join(__dir__, "manifold", "**", "*.rb")].each do |file| require file end diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb index e4e8f86..4376d17 100644 --- a/lib/manifold/api/project.rb +++ b/lib/manifold/api/project.rb @@ -19,7 +19,7 @@ def self.create(name, directory: Pathname.pwd.join(name)) end def workspaces - @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger: logger) } + @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) } end def generate diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 6f0a6ba..de3ab30 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -18,7 +18,7 @@ def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($s end def self.from_directory(directory, logger: Logger.new($stdout)) - new(directory.basename.to_s, logger: logger) + new(directory.basename.to_s, logger:) end def add diff --git a/lib/manifold/cli.rb b/lib/manifold/cli.rb index 1f4437c..e121480 100644 --- a/lib/manifold/cli.rb +++ b/lib/manifold/cli.rb @@ -48,7 +48,7 @@ def add(name) desc "generate", "Generate BigQuery schema for all workspaces in the project" def generate name = Pathname.pwd.basename.to_s - project = API::Project.new(name, directory: Pathname.pwd, logger: logger) + project = API::Project.new(name, directory: Pathname.pwd, logger:) project.generate logger.info "Generated BigQuery schema for all workspaces in the project." end diff --git a/manifold.gemspec b/manifold.gemspec index 15cff7a..a27a350 100644 --- a/manifold.gemspec +++ b/manifold.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |spec| spec.summary = "A CLI for managing data infrastructures in BigQuery" spec.homepage = "https://github.com/bustle/manifold" spec.license = "MIT" - spec.required_ruby_version = ">= 3.0.0" + spec.required_ruby_version = ">= 3.1.0" spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/bustle/manifold" @@ -32,4 +32,5 @@ Gem::Specification.new do |spec| # Uncomment to register a new dependency of your gem spec.add_dependency "thor" + spec.metadata["rubygems_mfa_required"] = "true" end diff --git a/spec/manifold/api/project_spec.rb b/spec/manifold/api/project_spec.rb index 70f0e9c..510ee9f 100644 --- a/spec/manifold/api/project_spec.rb +++ b/spec/manifold/api/project_spec.rb @@ -48,13 +48,13 @@ end describe "#generate" do - let(:workspace1) { instance_double(Manifold::API::Workspace) } - let(:workspace2) { instance_double(Manifold::API::Workspace) } + let(:workspace_one) { instance_double(Manifold::API::Workspace) } + let(:workspace_two) { instance_double(Manifold::API::Workspace) } before do described_class.create(name) - [workspace1, workspace2].each do |workspace| + [workspace_one, workspace_two].each do |workspace| project.workspaces << workspace allow(workspace).to receive(:generate) end @@ -62,7 +62,7 @@ it "calls generate on each workspace" do project.generate - expect([workspace1, workspace2]).to all(have_received(:generate)) + expect([workspace_one, workspace_two]).to all(have_received(:generate)) end end end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 520ea7b..b6ab488 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -3,7 +3,7 @@ RSpec.describe Manifold::API::Workspace do include FakeFS::SpecHelpers - subject(:workspace) { described_class.new(name, logger: logger) } + subject(:workspace) { described_class.new(name, logger:) } let(:logger) { instance_spy(Logger) } let(:name) { "people" } diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 8b65545..45d1eb1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -7,7 +7,7 @@ require "simplecov-json" require "simplecov-lcov" require "fakefs/spec_helpers" -Dir[File.join(__dir__, "support", "**", "*.rb")].sort.each { |f| require f } +Dir[File.join(__dir__, "support", "**", "*.rb")].each { |f| require f } SimpleCov::Formatter::LcovFormatter.config do |c| c.report_with_single_file = true From fbd896bf4cf991f151d68a5863bce5ed4334d352 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:47:22 -0500 Subject: [PATCH 11/17] not sure why these got missed --- lib/manifold/api/project.rb | 2 +- spec/manifold/api/project_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb index 4376d17..dfc8cc6 100644 --- a/lib/manifold/api/project.rb +++ b/lib/manifold/api/project.rb @@ -13,7 +13,7 @@ def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(n end def self.create(name, directory: Pathname.pwd.join(name)) - new(name, directory: directory).tap do |project| + new(name, directory:).tap do |project| [project.workspaces_directory, project.vectors_directory].each(&:mkpath) end end diff --git a/spec/manifold/api/project_spec.rb b/spec/manifold/api/project_spec.rb index 510ee9f..1e1e942 100644 --- a/spec/manifold/api/project_spec.rb +++ b/spec/manifold/api/project_spec.rb @@ -32,11 +32,11 @@ end context "with directory" do - subject(:project) { described_class.new(name, directory: directory) } + subject(:project) { described_class.new(name, directory:) } let(:directory) { Pathname.pwd.join("supplied_directory") } - it { is_expected.to have_attributes(directory: directory) } + it { is_expected.to have_attributes(directory:) } it "uses it as the base for the vectors directory" do expect(project.vectors_directory).to eq directory.join("vectors") From fdc2003b9ae928a6752ada28d03f304d603539af Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:48:26 -0500 Subject: [PATCH 12/17] Fix name --- spec/manifold/api/project_spec.rb | 2 +- spec/manifold/api/vector_spec.rb | 2 +- spec/manifold/api/workspace_spec.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/manifold/api/project_spec.rb b/spec/manifold/api/project_spec.rb index 1e1e942..0b09a5a 100644 --- a/spec/manifold/api/project_spec.rb +++ b/spec/manifold/api/project_spec.rb @@ -9,7 +9,7 @@ include_context "with template files" - it { is_expected.to have_attributes(name: name) } + it { is_expected.to have_attributes(name:) } describe ".create" do before { described_class.create(name) } diff --git a/spec/manifold/api/vector_spec.rb b/spec/manifold/api/vector_spec.rb index d3ddcfd..3a6dfd4 100644 --- a/spec/manifold/api/vector_spec.rb +++ b/spec/manifold/api/vector_spec.rb @@ -8,7 +8,7 @@ let(:name) { "page" } - it { is_expected.to have_attributes(name: name) } + it { is_expected.to have_attributes(name:) } describe ".add" do before { vector.add } diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index b6ab488..c119a59 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -10,7 +10,7 @@ include_context "with template files" - it { is_expected.to have_attributes(name: name) } + it { is_expected.to have_attributes(name:) } describe ".add" do before { workspace.add } From 9ebd87ddbdc4a9cf1c2628e2f429181e0fd2756d Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:54:27 -0500 Subject: [PATCH 13/17] Slash attr_writers --- lib/manifold/api/project.rb | 8 +++----- lib/manifold/api/vector.rb | 6 ++---- lib/manifold/api/workspace.rb | 6 ++---- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/lib/manifold/api/project.rb b/lib/manifold/api/project.rb index dfc8cc6..4cbb12a 100644 --- a/lib/manifold/api/project.rb +++ b/lib/manifold/api/project.rb @@ -7,9 +7,9 @@ class Project attr_reader :name, :logger, :directory def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(name)) - self.name = name - self.logger = logger - self.directory = Pathname(directory) + @name = name + @logger = logger + @directory = Pathname(directory) end def self.create(name, directory: Pathname.pwd.join(name)) @@ -39,8 +39,6 @@ def vectors_directory def workspace_directories workspaces_directory.children.select(&:directory?) end - - attr_writer :name, :logger, :directory end end end diff --git a/lib/manifold/api/vector.rb b/lib/manifold/api/vector.rb index 78f639d..3c28a8c 100644 --- a/lib/manifold/api/vector.rb +++ b/lib/manifold/api/vector.rb @@ -11,8 +11,8 @@ class Vector ).freeze def initialize(name, template_path: DEFAULT_TEMPLATE_PATH) - self.name = name - self.template_path = Pathname(template_path) + @name = name + @template_path = Pathname(template_path) end def add @@ -22,8 +22,6 @@ def add private - attr_writer :name, :template_path - def directory Pathname.pwd.join("vectors") end diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index de3ab30..2ffcf11 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -11,8 +11,8 @@ class Workspace ).freeze def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($stdout)) - self.name = name - self.template_path = template_path + @name = name + @template_path = template_path @logger = logger @vector_service = Services::VectorService.new(logger) end @@ -99,8 +99,6 @@ def any_vectors? def vectors manifold_yaml["vectors"] end - - attr_writer :name, :template_path end end end From afbdd7f9d682b1dad1278688a8ab9e6f2b06c22f Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:56:21 -0500 Subject: [PATCH 14/17] attr_reader the :logger --- lib/manifold/api/workspace.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 2ffcf11..1f91982 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -4,7 +4,7 @@ module Manifold module API # Encapsulates a single manifold. class Workspace - attr_reader :name, :template_path + attr_reader :name, :template_path, :logger DEFAULT_TEMPLATE_PATH = File.expand_path( "../templates/workspace_template.yml", __dir__ @@ -30,7 +30,7 @@ def generate return unless manifold_exists? && any_vectors? generate_dimensions - @logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") + logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.") end def tables_directory @@ -79,7 +79,7 @@ def dimensions_schema def dimensions_fields vectors.reduce([]) do |list, vector| - @logger.info("Loading vector schema for '#{vector}'.") + logger.info("Loading vector schema for '#{vector}'.") list << @vector_service.load_vector_schema(vector) end end From f1b027d6a3b737548295908dfbac81f1a582bf4d Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 17:58:20 -0500 Subject: [PATCH 15/17] filter_map instead of shovel --- lib/manifold/api/workspace.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/manifold/api/workspace.rb b/lib/manifold/api/workspace.rb index 1f91982..0f33f4c 100644 --- a/lib/manifold/api/workspace.rb +++ b/lib/manifold/api/workspace.rb @@ -78,9 +78,9 @@ def dimensions_schema end def dimensions_fields - vectors.reduce([]) do |list, vector| + vectors.filter_map do |vector| logger.info("Loading vector schema for '#{vector}'.") - list << @vector_service.load_vector_schema(vector) + @vector_service.load_vector_schema(vector) end end From 95d26fea1f60c2748f131537777624037e701d17 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 18:01:10 -0500 Subject: [PATCH 16/17] Raise on no configuration --- lib/manifold/services/vector_service.rb | 5 +---- spec/manifold/services/vector_service_spec.rb | 14 ++++---------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/lib/manifold/services/vector_service.rb b/lib/manifold/services/vector_service.rb index 1886dbb..4da50db 100644 --- a/lib/manifold/services/vector_service.rb +++ b/lib/manifold/services/vector_service.rb @@ -10,10 +10,7 @@ def initialize(logger) def load_vector_schema(vector_name) path = config_path(vector_name) - unless path.file? - @logger.error("Vector configuration not found: #{path}") - return nil - end + raise "Vector configuration not found: #{path}" unless path.file? config = YAML.safe_load_file(path) fields = transform_attributes_to_schema(config["attributes"]) diff --git a/spec/manifold/services/vector_service_spec.rb b/spec/manifold/services/vector_service_spec.rb index ee4903c..80466c6 100644 --- a/spec/manifold/services/vector_service_spec.rb +++ b/spec/manifold/services/vector_service_spec.rb @@ -51,16 +51,10 @@ allow(logger).to receive(:error) end - it "returns nil" do - expect(service.load_vector_schema(vector_name)).to be_nil - end - - it "logs an error message" do - path = Pathname.pwd.join("vectors", "#{vector_name}.yml") - service.load_vector_schema(vector_name) - - expect(logger).to have_received(:error) - .with("Vector configuration not found: #{path}") + it "raises an error" do + expect { service.load_vector_schema(vector_name) }.to raise_error( + "Vector configuration not found: #{Pathname.pwd.join("vectors", "#{vector_name}.yml")}" + ) end end end From 5521736137e94d519e675740bb4e8dbd528b1b86 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Thu, 21 Nov 2024 20:20:43 -0500 Subject: [PATCH 17/17] Capture invalid YAML --- lib/manifold/services/vector_service.rb | 6 ++++-- spec/manifold/services/vector_service_spec.rb | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/manifold/services/vector_service.rb b/lib/manifold/services/vector_service.rb index 4da50db..40d7550 100644 --- a/lib/manifold/services/vector_service.rb +++ b/lib/manifold/services/vector_service.rb @@ -10,11 +10,13 @@ def initialize(logger) def load_vector_schema(vector_name) path = config_path(vector_name) - raise "Vector configuration not found: #{path}" unless path.file? - config = YAML.safe_load_file(path) fields = transform_attributes_to_schema(config["attributes"]) { "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields } + rescue Errno::ENOENT, Errno::EISDIR + raise "Vector configuration not found: #{path}" + rescue Psych::Exception => e + raise "Invalid YAML in vector configuration #{path}: #{e.message}" end private diff --git a/spec/manifold/services/vector_service_spec.rb b/spec/manifold/services/vector_service_spec.rb index 80466c6..529b2a1 100644 --- a/spec/manifold/services/vector_service_spec.rb +++ b/spec/manifold/services/vector_service_spec.rb @@ -57,5 +57,19 @@ ) end end + + context "when vector configuration is invalid" do + before do + Pathname.pwd.join("vectors").mkpath + config_path = Pathname.pwd.join("vectors", "#{vector_name}.yml") + config_path.write("invalid_key: [value1, value2") + end + + it "raises an error" do + expect { service.load_vector_schema(vector_name) }.to raise_error( + /Invalid YAML in vector configuration/ + ) + end + end end end