Skip to content

Commit

Permalink
Merge pull request #92 from DFE-Digital/ab-data-importer-logic-updates
Browse files Browse the repository at this point in the history
Improve the AB data importer
  • Loading branch information
peteryates authored Jan 29, 2025
2 parents 3474910 + c3f8871 commit 1ec9e7c
Show file tree
Hide file tree
Showing 9 changed files with 597 additions and 143 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ group :test do
gem "rspec"
gem "rspec-rails"
gem "shoulda-matchers"
gem "super_diff"
end

group :development, :test do
Expand Down
9 changes: 9 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ GEM
nokogiri
asciidoctor (2.0.23)
ast (2.4.2)
attr_extras (7.1.0)
attr_required (1.0.2)
base32 (0.3.4)
base64 (0.2.0)
Expand Down Expand Up @@ -359,13 +360,16 @@ GEM
tzinfo
validate_url
webfinger (~> 2.0)
optimist (3.2.0)
pagy (9.2.1)
parallel (1.26.3)
parser (3.3.7.0)
ast (~> 2.4.1)
racc
pastel (0.8.0)
tty-color (~> 0.5)
patience_diff (1.2.0)
optimist (~> 3.0)
pg (1.5.9)
playwright-ruby-client (1.49.0)
concurrent-ruby (>= 1.1.6)
Expand Down Expand Up @@ -536,6 +540,10 @@ GEM
set (~> 1.0)
stackprof (0.2.27)
stringio (3.1.2)
super_diff (0.15.0)
attr_extras (>= 6.2.4)
diff-lcs
patience_diff
swd (2.0.3)
activesupport (>= 3)
attr_required (>= 0.0.5)
Expand Down Expand Up @@ -634,6 +642,7 @@ DEPENDENCIES
shoulda-matchers
solid_queue
stackprof
super_diff
tzinfo-data
webrick

Expand Down
37 changes: 26 additions & 11 deletions lib/appropriate_bodies/importers/appropriate_body_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,28 @@

module AppropriateBodies::Importers
class AppropriateBodyImporter
def initialize(filename = Rails.root.join('db/samples/appropriate-body-portal/appropriate-body.csv'))
@csv = CSV.read(filename, headers: true)
IMPORT_ERROR_LOG = 'tmp/appropriate_body_import.log'.freeze

Row = Struct.new(:legacy_id, :name, :dfe_sign_in_organisation_id, :local_authority_code, :establishment_number) do
def to_h
{ name:, legacy_id:, establishment_number:, local_authority_code:, dfe_sign_in_organisation_id: SecureRandom.uuid } # FIXME: fix dfe_sign_in_organisation_id
end
end

def import
AppropriateBody.transaction do
@csv.each do |row|
Rails.logger.debug("attempting to import row: #{row.to_h}")
def initialize(filename, wanted_legacy_ids)
@csv = CSV.read(filename, headers: true)
@wanted_legacy_ids = wanted_legacy_ids

AppropriateBody.create!(**build(row))
end
end
File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
@import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
end

def rows
@csv.map { |row|
next unless row['id'].in?(@wanted_legacy_ids)

@csv.count
Row.new(**build(row))
}.compact
end

private
Expand Down Expand Up @@ -67,8 +75,15 @@ def extract_local_authority_code_and_establishment_number(row)
local_authority_code: local_authority_code[0..2],
establishment_number: local_authority_code[4..8]
}
when %r{\A\d{7}\z}
{
local_authority_code: local_authority_code[0..2],
establishment_number: local_authority_code[3..7]
}
else
Rails.logger.debug("Can't import #{local_authority_code} from #{row}")
@import_error_log.error "#########################"
@import_error_log.error "Invalid local authority code"
@import_error_log.error "Value: #{local_authority_code}"

{}
end
Expand Down
64 changes: 64 additions & 0 deletions lib/appropriate_bodies/importers/importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
module AppropriateBodies::Importers
class Importer
# rubocop:disable Rails/Output
def initialize(appropriate_body_csv, teachers_csv, induction_period_csv)
induction_periods_grouped_by_trn = InductionPeriodImporter.new(induction_period_csv).periods_by_trn

active_teachers = induction_periods_grouped_by_trn.keys
teacher_importer_rows = TeacherImporter.new(teachers_csv, active_teachers).rows

active_abs = induction_periods_grouped_by_trn.flat_map { |_trn, ips| ips.map(&:legacy_appropriate_body_id) }.uniq
ab_importer_rows = AppropriateBodyImporter.new(appropriate_body_csv, active_abs).rows

puts "Active appropriate bodies: #{active_abs.count}"
# FIXME: use insert_all! here
AppropriateBody.insert_all(ab_importer_rows.select { |r| r.legacy_id.in?(active_abs) }.map(&:to_h))
puts "Appropriate bodies inserted: #{AppropriateBody.count}"

puts "Active Teachers: #{teacher_importer_rows.count}"
# FIXME: use insert_all! here
Teacher.insert_all(teacher_importer_rows.map(&:to_h))
puts "Teachers inserted: #{Teacher.count}"

# TODO: insert induction periods
teacher_trn_to_id = Teacher.all.select(:id, :trn).each_with_object({}) do |t, h|
h[t[:trn]] = t[:id]
end

ab_legacy_id_to_id = AppropriateBody.all.select(:id, :legacy_id).each_with_object({}) do |ab, h|
h[ab[:legacy_id]] = ab[:id]
end

induction_period_rows = []

induction_periods_grouped_by_trn.each do |trn, induction_periods|
induction_periods.each do |ip|
begin
ip.teacher_id = teacher_trn_to_id.fetch(trn)
rescue KeyError
puts "No teacher found with trn: #{trn}"
next
end

begin
ip.appropriate_body_id = ab_legacy_id_to_id.fetch(ip.legacy_appropriate_body_id)
rescue KeyError
puts "No appropriate body found with legacy_id: #{ip.legacy_appropriate_body_id}"
next
end

induction_period_rows << ip
end
end

# FIXME: use insert_all! here
InductionPeriod.insert_all(induction_period_rows.map(&:to_record))

# TODO: insert extensions
# TODO: insert events

binding.debugger
end
# rubocop:enable Rails/Output
end
end
Loading

0 comments on commit 1ec9e7c

Please sign in to comment.