Skip to content

Commit

Permalink
Do not glob out files unnecessarily (#54)
Browse files Browse the repository at this point in the history
* Do not glob out all files unless we have to

* pass nil to validate through CLI so we do not need to call tracked files

* add a test for many files case

* code review
  • Loading branch information
Alex Evanczuk authored Apr 12, 2023
1 parent 5500ab3 commit f1be52a
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
code_ownership (1.32.8)
code_ownership (1.32.9)
code_teams (~> 1.0)
packs
sorbet-runtime
Expand Down
2 changes: 1 addition & 1 deletion code_ownership.gemspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Gem::Specification.new do |spec|
spec.name = "code_ownership"
spec.version = '1.32.8'
spec.version = '1.32.9'
spec.authors = ['Gusto Engineers']
spec.email = ['[email protected]']
spec.summary = 'A gem to help engineering teams declare ownership of code'
Expand Down
16 changes: 11 additions & 5 deletions lib/code_ownership.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,24 @@ def self.remove_file_annotation!(filename)

sig do
params(
files: T::Array[String],
autocorrect: T::Boolean,
stage_changes: T::Boolean
stage_changes: T::Boolean,
files: T.nilable(T::Array[String]),
).void
end
def validate!(
files: Private.tracked_files,
autocorrect: true,
stage_changes: true
stage_changes: true,
files: nil
)
Private.load_configuration!
tracked_file_subset = Private.tracked_files & files

tracked_file_subset = if files
files.select{|f| Private.file_tracked?(f)}
else
Private.tracked_files
end

Private.validate!(files: tracked_file_subset, autocorrect: autocorrect, stage_changes: stage_changes)
end

Expand Down
2 changes: 1 addition & 1 deletion lib/code_ownership/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def self.validate!(argv)
File.exist?(file)
end
else
Private.tracked_files
nil
end

CodeOwnership.validate!(
Expand Down
18 changes: 18 additions & 0 deletions lib/code_ownership/private.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,24 @@ def self.tracked_files
@tracked_files ||= Dir.glob(configuration.owned_globs) - Dir.glob(configuration.unowned_globs)
end

sig { params(file: String).returns(T::Boolean) }
def self.file_tracked?(file)
# Another way to accomplish this is
# (Dir.glob(configuration.owned_globs) - Dir.glob(configuration.unowned_globs)).include?(file)
# However, globbing out can take 5 or more seconds on a large repository, dramatically slowing down
# invocations to `bin/codeownership validate --diff`.
# Using `File.fnmatch?` is a lot faster!
in_owned_globs = configuration.owned_globs.all? do |owned_glob|
File.fnmatch?(owned_glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
end

in_unowned_globs = configuration.unowned_globs.all? do |unowned_glob|
File.fnmatch?(unowned_glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
end

in_owned_globs && !in_unowned_globs
end

sig { params(team_name: String, location_of_reference: String).returns(CodeTeams::Team) }
def self.find_team!(team_name, location_of_reference)
found_team = CodeTeams.find(team_name)
Expand Down
55 changes: 47 additions & 8 deletions lib/code_ownership/private/glob_cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class GlobCache
FilesByMapper = T.type_alias do
T::Hash[
String,
T::Array[MapperDescription]
T::Set[MapperDescription]
]
end

Expand All @@ -32,6 +32,20 @@ def raw_cache_contents
@raw_cache_contents
end

sig { params(files: T::Array[String]).returns(FilesByMapper) }
def mapper_descriptions_that_map_files(files)
if files.count > 100
# When looking at many files, expanding the cache out using Dir.glob and checking for intersections is faster
files_by_mappers = files.map{ |f| [f, Set.new([]) ]}.to_h
files_by_mappers.merge(files_by_mappers_via_expanded_cache)
else
# When looking at few files, using File.fnmatch is faster
files_by_mappers_via_file_fnmatch(files)
end
end

private

sig { returns(CacheShape) }
def expanded_cache
@expanded_cache = T.let(@expanded_cache, T.nilable(CacheShape))
Expand All @@ -52,20 +66,45 @@ def expanded_cache
end

sig { returns(FilesByMapper) }
def files_by_mapper
@files_by_mapper ||= T.let(@files_by_mapper, T.nilable(FilesByMapper))
@files_by_mapper ||= begin
files_by_mapper = {}
def files_by_mappers_via_expanded_cache
@files_by_mappers ||= T.let(@files_by_mappers, T.nilable(FilesByMapper))
@files_by_mappers ||= begin
files_by_mappers = T.let({}, FilesByMapper)
expanded_cache.each do |mapper_description, file_by_owner|
file_by_owner.each do |file, owner|
files_by_mapper[file] ||= []
files_by_mapper[file] << mapper_description
files_by_mappers[file] ||= Set.new([])
files_by_mappers.fetch(file) << mapper_description
end
end

files_by_mapper
files_by_mappers
end
end

sig { params(files: T::Array[String]).returns(FilesByMapper) }
def files_by_mappers_via_file_fnmatch(files)
files_by_mappers = T.let({}, FilesByMapper)

files.each do |file|
files_by_mappers[file] ||= Set.new([])
@raw_cache_contents.each do |mapper_description, globs_by_owner|
# As much as I'd like to *not* special case the file annotations mapper, using File.fnmatch? on the thousands of files mapped by the
# file annotations mapper is a lot of unnecessary extra work.
# Therefore we can just check if the file is in the globs directly for file annotations, otherwise use File.fnmatch
if mapper_description == OwnershipMappers::FileAnnotations::DESCRIPTION
files_by_mappers.fetch(file) << mapper_description if globs_by_owner[file]
else
globs_by_owner.each do |glob, owner|
if File.fnmatch?(glob, file, File::FNM_PATHNAME | File::FNM_EXTGLOB)
files_by_mappers.fetch(file) << mapper_description
end
end
end
end
end

files_by_mappers
end
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class FileAnnotations
@@map_files_to_owners = T.let({}, T.nilable(T::Hash[String, ::CodeTeams::Team])) # rubocop:disable Style/ClassVars

TEAM_PATTERN = T.let(/\A(?:#|\/\/) @team (?<team>.*)\Z/.freeze, Regexp)
DESCRIPTION = 'Annotations at the top of file'

sig do
override.params(file: String).
Expand Down Expand Up @@ -51,11 +52,8 @@ def globs_to_owner(files)
end
def update_cache(cache, files)
cache.merge!(globs_to_owner(files))

# TODO: Make `tracked_files` return a set
fileset = Set.new(Private.tracked_files)
invalid_files = cache.keys.select do |file|
!fileset.include?(file)
!Private.file_tracked?(file)
end
invalid_files.each do |invalid_file|
cache.delete(invalid_file)
Expand Down Expand Up @@ -116,7 +114,7 @@ def remove_file_annotation!(filename)

sig { override.returns(String) }
def description
'Annotations at the top of file'
DESCRIPTION
end

sig { override.void }
Expand Down
10 changes: 5 additions & 5 deletions lib/code_ownership/private/validations/files_have_owners.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class FilesHaveOwners

sig { override.params(files: T::Array[String], autocorrect: T::Boolean, stage_changes: T::Boolean).returns(T::Array[String]) }
def validation_errors(files:, autocorrect: true, stage_changes: true)
files_by_mapper = Private.glob_cache.files_by_mapper

files_not_mapped_at_all = files.select do |file|
files_by_mapper.fetch(file, []).count == 0
cache = Private.glob_cache
file_mappings = cache.mapper_descriptions_that_map_files(files)
files_not_mapped_at_all = file_mappings.select do |file, mapper_descriptions|
mapper_descriptions.count == 0
end

errors = T.let([], T::Array[String])
Expand All @@ -22,7 +22,7 @@ def validation_errors(files:, autocorrect: true, stage_changes: true)
errors << <<~MSG
Some files are missing ownership:
#{files_not_mapped_at_all.map { |file| "- #{file}" }.join("\n")}
#{files_not_mapped_at_all.map { |file, mappers| "- #{file}" }.join("\n")}
MSG
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,10 @@ class FilesHaveUniqueOwners

sig { override.params(files: T::Array[String], autocorrect: T::Boolean, stage_changes: T::Boolean).returns(T::Array[String]) }
def validation_errors(files:, autocorrect: true, stage_changes: true)
files_by_mapper = Private.glob_cache.files_by_mapper

files_mapped_by_multiple_mappers = {}
files.each do |file|
mappers = files_by_mapper.fetch(file, [])
if mappers.count > 1
files_mapped_by_multiple_mappers[file] = mappers
end
cache = Private.glob_cache
file_mappings = cache.mapper_descriptions_that_map_files(files)
files_mapped_by_multiple_mappers = file_mappings.select do |file, mapper_descriptions|
mapper_descriptions.count > 1
end

errors = T.let([], T::Array[String])
Expand All @@ -26,7 +22,7 @@ def validation_errors(files:, autocorrect: true, stage_changes: true)
errors << <<~MSG
Code ownership should only be defined for each file in one way. The following files have declared ownership in multiple ways.
#{files_mapped_by_multiple_mappers.map { |file, descriptions| "- #{file} (#{descriptions.join(', ')})" }.join("\n")}
#{files_mapped_by_multiple_mappers.map { |file, descriptions| "- #{file} (#{descriptions.to_a.join(', ')})" }.join("\n")}
MSG
end

Expand Down
2 changes: 1 addition & 1 deletion spec/lib/code_ownership/cli_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
expect(CodeOwnership).to receive(:validate!) do |args| # rubocop:disable RSpec/MessageSpies
expect(args[:autocorrect]).to eq true
expect(args[:stage_changes]).to eq true
expect(args[:files]).to match_array(["app/services/my_file.rb", "frontend/javascripts/my_file.jsx"])
expect(args[:files]).to be_nil
end
subject
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ module CodeOwnership
before do
write_file('app/missing_ownership.rb', <<~CONTENTS)
CONTENTS

write_file('app/some_other_file.rb', <<~CONTENTS)
# @team Bar
CONTENTS

write_file('config/teams/bar.yml', <<~CONTENTS)
name: Bar
CONTENTS
end

context 'the file is not in unowned_globs' do
Expand Down Expand Up @@ -58,6 +66,26 @@ module CodeOwnership
end
end

context 'many files in owned_globs do not have an owner' do
before do
write_configuration

500.times do |i|
write_file("app/missing_ownership#{i}.rb", <<~CONTENTS)
CONTENTS
end
end

it 'lets the user know the file must have ownership' do
expect { CodeOwnership.validate! }.to raise_error do |e|
expect(e).to be_a CodeOwnership::InvalidCodeOwnershipConfigurationError
expect(e.message).to include "Some files are missing ownership:"
500.times do |i|
expect(e.message).to include "- app/missing_ownership#{i}.rb"
end
end
end
end
end
end
end

0 comments on commit f1be52a

Please sign in to comment.