From 739f2173c297b0afb33202b542ce04b6fa36a337 Mon Sep 17 00:00:00 2001
From: Peter Yates <peter.yates@graphia.co.uk>
Date: Thu, 23 Jan 2025 17:30:56 +0000
Subject: [PATCH 1/5] Add super_diff gem for testing

---
 Gemfile              | 1 +
 Gemfile.lock         | 9 +++++++++
 spec/rails_helper.rb | 1 +
 3 files changed, 11 insertions(+)

diff --git a/Gemfile b/Gemfile
index c8e7c744..c8cffdcf 100644
--- a/Gemfile
+++ b/Gemfile
@@ -49,6 +49,7 @@ group :test do
   gem "rspec"
   gem "rspec-rails"
   gem "shoulda-matchers"
+  gem "super_diff"
 end
 
 group :development, :test do
diff --git a/Gemfile.lock b/Gemfile.lock
index 48fdf49b..2ee3bf7d 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -97,6 +97,7 @@ GEM
       nokogiri
     asciidoctor (2.0.23)
     ast (2.4.2)
+    attr_extras (7.1.0)
     attr_required (1.0.2)
     base32 (0.3.4)
     base64 (0.2.0)
@@ -359,6 +360,7 @@ GEM
       tzinfo
       validate_url
       webfinger (~> 2.0)
+    optimist (3.2.0)
     pagy (9.2.1)
     parallel (1.26.3)
     parser (3.3.7.0)
@@ -366,6 +368,8 @@ GEM
       racc
     pastel (0.8.0)
       tty-color (~> 0.5)
+    patience_diff (1.2.0)
+      optimist (~> 3.0)
     pg (1.5.9)
     playwright-ruby-client (1.49.0)
       concurrent-ruby (>= 1.1.6)
@@ -536,6 +540,10 @@ GEM
       set (~> 1.0)
     stackprof (0.2.27)
     stringio (3.1.2)
+    super_diff (0.15.0)
+      attr_extras (>= 6.2.4)
+      diff-lcs
+      patience_diff
     swd (2.0.3)
       activesupport (>= 3)
       attr_required (>= 0.0.5)
@@ -634,6 +642,7 @@ DEPENDENCIES
   shoulda-matchers
   solid_queue
   stackprof
+  super_diff
   tzinfo-data
   webrick
 
diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb
index 5e69a297..ab7173fc 100644
--- a/spec/rails_helper.rb
+++ b/spec/rails_helper.rb
@@ -1,5 +1,6 @@
 require 'ostruct'
 require 'spec_helper'
+require "super_diff/rspec-rails"
 ENV['RAILS_ENV'] ||= 'test'
 require_relative '../config/environment'
 

From 3e251c50e968ec44560dffbef18199afc767812d Mon Sep 17 00:00:00 2001
From: Peter Yates <peter.yates@graphia.co.uk>
Date: Thu, 23 Jan 2025 17:32:46 +0000
Subject: [PATCH 2/5] Allow the rake task to take CSV location arguments

---
 lib/tasks/appropriate_body_import.rake | 32 +++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/lib/tasks/appropriate_body_import.rake b/lib/tasks/appropriate_body_import.rake
index a3201ec7..822f141b 100644
--- a/lib/tasks/appropriate_body_import.rake
+++ b/lib/tasks/appropriate_body_import.rake
@@ -1,19 +1,39 @@
+require 'optparse'
+
 namespace :appropriate_body do
   desc "Import Appropriate Body data from the old AB Portal"
   task import: :environment do
+    appropriate_bodies_csv = 'db/samples/appropriate-body-portal/appropriate-body.csv'
+    teachers_csv = 'db/samples/appropriate-body-portal/teachers.csv'
+    induction_periods_csv = 'db/samples/appropriate-body-portal/induction-periods.csv'
+
+    files = {}
+
+    opts = OptionParser.new
+    opts.banner = "Usage: rake appropriate_body:import --appropriate-body /tmp/ab.csv --teacher /tmp/teacher.csv --induction-period /tmp/ip.csv"
+    opts.on("-a", "--appropriate-body ARG", String) { |val| files[:appropriate_bodies_csv] = val }
+    opts.on("-t", "--teacher ARG", String) { |val| files[:teachers_csv] = val }
+    opts.on("-i", "--induction-period ARG", String) { |val| files[:induction_periods_csv] = val }
+    args = opts.order!(ARGV) {}
+    opts.parse!(args)
+
+    appropriate_bodies_csv = files[:appropriate_bodies_csv] if files[:appropriate_bodies_csv]
+    teachers_csv = files[:teachers_csv] if files[:teachers_csv]
+    induction_periods_csv = files[:induction_periods_csv] if files[:induction_periods_csv]
+
     logger = Logger.new($stdout)
     logger.info "Checking files exist"
 
-    logger.info "Importing Appropriate Body records"
-    imported_abs = AppropriateBodies::Importers::AppropriateBodyImporter.new.import
+    logger.info "Importing Appropriate Body records from #{appropriate_bodies_csv}"
+    imported_abs = AppropriateBodies::Importers::AppropriateBodyImporter.new(appropriate_bodies_csv).import
     logger.info "  #{imported_abs} Appropriate Body records imported ✅"
 
-    logger.info "Importing Teacher records"
-    imported_teachers, total_teachers = AppropriateBodies::Importers::TeacherImporter.new.import
+    logger.info "Importing Teacher records from #{teachers_csv}"
+    imported_teachers, total_teachers = AppropriateBodies::Importers::TeacherImporter.new(teachers_csv).import
     logger.info "  #{imported_teachers} Teacher records imported out of #{total_teachers} ✅"
 
-    logger.info "Importing Induction Periods"
-    imported_induction_periods, total_induction_periods = AppropriateBodies::Importers::InductionPeriodImporter.new.import
+    logger.info "Importing Induction Periods from #{induction_periods_csv}"
+    imported_induction_periods, total_induction_periods = AppropriateBodies::Importers::InductionPeriodImporter.new(induction_periods_csv).import
     logger.info "  #{imported_induction_periods} Induction period records imported out of #{total_induction_periods} ✅"
   end
 end

From 05e701afa4ef743d82bfb54d7a77ab6b8a16029e Mon Sep 17 00:00:00 2001
From: Peter Yates <peter.yates@graphia.co.uk>
Date: Thu, 23 Jan 2025 17:32:10 +0000
Subject: [PATCH 3/5] Add new importer class

---
 .../importers/appropriate_body_importer.rb    |  50 ++-
 lib/appropriate_bodies/importers/importer.rb  |  15 +
 .../importers/induction_period_importer.rb    | 144 +++++----
 .../importers/teacher_importer.rb             |  65 ++--
 .../induction_period_importer_spec.rb         | 285 +++++++++++++++---
 5 files changed, 418 insertions(+), 141 deletions(-)
 create mode 100644 lib/appropriate_bodies/importers/importer.rb

diff --git a/lib/appropriate_bodies/importers/appropriate_body_importer.rb b/lib/appropriate_bodies/importers/appropriate_body_importer.rb
index 395b85e6..8842ac8a 100644
--- a/lib/appropriate_bodies/importers/appropriate_body_importer.rb
+++ b/lib/appropriate_bodies/importers/appropriate_body_importer.rb
@@ -2,22 +2,41 @@
 
 module AppropriateBodies::Importers
   class AppropriateBodyImporter
-    def initialize(filename = Rails.root.join('db/samples/appropriate-body-portal/appropriate-body.csv'))
-      @csv = CSV.read(filename, headers: true)
-    end
+    IMPORT_ERROR_LOG = 'tmp/appropriate_body_import.log'.freeze
 
-    def import
-      AppropriateBody.transaction do
-        @csv.each do |row|
-          Rails.logger.debug("attempting to import row: #{row.to_h}")
+    Row = Struct.new(:legacy_id, :name, :dfe_sign_in_organisation_id, :local_authority_code, :establishment_number)
 
-          AppropriateBody.create!(**build(row))
-        end
-      end
+    def initialize(filename)
+      @csv = CSV.read(filename, headers: true)
+
+      File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
+      @import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
+    end
 
-      @csv.count
+    def rows
+      @csv.map { |row| Row.new(**build(row)) }
     end
 
+    # def import
+    #   AppropriateBody.transaction do
+    #     @csv.each.with_index(1) do |row, i|
+    #       Rails.logger.debug("attempting to import row: #{row.to_h}")
+    #
+    #       begin
+    #         AppropriateBody.create!(**build(row))
+    #       rescue ActiveRecord::RecordInvalid => e
+    #         @import_error_log.error "#########################"
+    #         @import_error_log.error "Failed to import Appropriate Body"
+    #         @import_error_log.error "Row number: #{i}"
+    #         @import_error_log.error "Message: #{e.message}"
+    #         @import_error_log.error "Row data: #{row}"
+    #       end
+    #     end
+    #   end
+    #
+    #   @csv.count
+    # end
+
   private
 
     def build(row)
@@ -67,8 +86,15 @@ def extract_local_authority_code_and_establishment_number(row)
                    local_authority_code: local_authority_code[0..2],
                    establishment_number: local_authority_code[4..8]
                  }
+               when %r{\A\d{7}\z}
+                 {
+                   local_authority_code: local_authority_code[0..2],
+                   establishment_number: local_authority_code[3..7]
+                 }
                else
-                 Rails.logger.debug("Can't import #{local_authority_code} from #{row}")
+                 @import_error_log.error "#########################"
+                 @import_error_log.error "Invalid local authority code"
+                 @import_error_log.error "Value: #{local_authority_code}"
 
                  {}
                end
diff --git a/lib/appropriate_bodies/importers/importer.rb b/lib/appropriate_bodies/importers/importer.rb
new file mode 100644
index 00000000..f50bd7a5
--- /dev/null
+++ b/lib/appropriate_bodies/importers/importer.rb
@@ -0,0 +1,15 @@
+module AppropriateBodies::Importers
+  class Importer
+    def initialize(appropriate_body_csv, teachers_csv, induction_period_csv)
+      # ab_importer_rows = AppropriateBodyImporter.new(appropriate_body_csv).rows
+      # teacher_importer_rows = TeacherImporter.new(teachers_csv).rows
+      induction_periods_grouped_by_trn = InductionPeriodImporter.new(induction_period_csv).periods_by_trn
+
+      true
+
+      binding.debugger
+      # get induction periods grouped by TRN
+      #
+    end
+  end
+end
diff --git a/lib/appropriate_bodies/importers/induction_period_importer.rb b/lib/appropriate_bodies/importers/induction_period_importer.rb
index 5232658b..3394afe4 100644
--- a/lib/appropriate_bodies/importers/induction_period_importer.rb
+++ b/lib/appropriate_bodies/importers/induction_period_importer.rb
@@ -2,23 +2,30 @@
 
 module AppropriateBodies::Importers
   class InductionPeriodImporter
+    IMPORT_ERROR_LOG = 'tmp/induction_period_import.log'.freeze
     LOGFILE = Rails.root.join("log/induction_period_import.log").freeze
 
     attr_accessor :csv, :data
 
-    Row = Struct.new(:appropriate_body_id, :started_on, :finished_on, :induction_programme, :number_of_terms, :trn, keyword_init: true) do
+    Row = Struct.new(:appropriate_body_id, :started_on, :finished_on, :induction_programme, :number_of_terms, :trn, :notes, keyword_init: true) do
       def range
         started_on...finished_on
       end
 
-      def to_hash
-        { appropriate_body_id:, started_on:, finished_on:, number_of_terms:, induction_programme: convert_induction_programme }
-      end
-
       def length
         (finished_on || Time.zone.today) - started_on
       end
 
+      # used in notes
+      def to_h
+        { appropriate_body_id:, started_on:, finished_on:, induction_programme:, number_of_terms: }
+      end
+
+      # used for comparisons in tests
+      def to_hash
+        { appropriate_body_id:, started_on:, finished_on:, number_of_terms:, induction_programme: convert_induction_programme }
+      end
+
     private
 
       def convert_induction_programme
@@ -30,67 +37,51 @@ def convert_induction_programme
       end
     end
 
-    def initialize(filename: Rails.root.join('db/samples/appropriate-body-portal/induction-periods.csv'), csv: nil)
-      File.delete(LOGFILE) if File.exist?(LOGFILE)
-
+    def initialize(filename, csv: nil)
       @csv = csv || CSV.read(filename, headers: true)
 
-      data = @csv.map do |row|
-        logger.debug("attempting to import row: #{row.to_hash}")
-
-        unless (appropriate_body_id = appropriate_bodies[row['appropriate_body_id']])
-          logger.warn("No AB found for ID #{row['appropriate_body_id']}")
-
-          next
-        end
-
-        Row.new(
-          appropriate_body_id:,
-          started_on: extract_date(row['started_on']),
-          finished_on: extract_date(row['finished_on']),
-          induction_programme: row['induction_programme_choice'],
-          number_of_terms: row['number_of_terms'].to_i,
-          trn: row['trn']
-        )
-      end
-
-      @data = data.compact
+      File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
+      @import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
     end
 
-    def import
-      count = 0
-
-      periods_by_trn.each do |trn, rows|
-        logger.debug("adding rows for #{trn}")
-        teacher_id = teachers[trn]
-
-        InductionPeriod.transaction do
-          rows.map do |row|
-            InductionPeriod.create!(**row, teacher_id:)
-            count += 1
-          end
-        end
-      end
+    def rows
+      @rows ||= @csv.map { |row| Row.new(**build(row)) }
+    end
 
-      [count, @csv.count]
+    def build(row)
+      {
+        appropriate_body_id: row['appropriate_body_id'],
+        started_on: extract_date(row['started_on']),
+        finished_on: extract_date(row['finished_on']),
+        induction_programme: row['induction_programme_choice'],
+        number_of_terms: row['number_of_terms'].to_i,
+        trn: row['trn'],
+        notes: []
+      }
     end
 
     def periods_by_trn
-      @data
-        .reject { |ip| ip.finished_on && ip.started_on >= ip.finished_on } # FIXME: log these
+      rows
+        .reject { |ip| ip.started_on.nil? }
+        .reject { |ip| ip.started_on == Date.new(1, 1, 1) }
+        .reject { |ip| ip.finished_on && ip.started_on >= ip.finished_on }
         .group_by(&:trn)
+        .select { |_trn, periods| periods.any? { |p| p.finished_on.nil? } }
         .transform_values { |periods| periods.sort_by { |p| [p.started_on, p.length, p.appropriate_body_id] } }
         .each_with_object({}) do |(trn, rows), h|
           keep = []
 
           rows.each do |current|
             keep << current and next if keep.empty?
-            keep << current and next if keep.none? { |sibling| current.range.overlap?(sibling.range) }
+            keep << current and next if keep.none? { |already_recorded| current.range.overlap?(already_recorded.range) }
 
             keep
               .select { |sibling| sibling.range.overlap?(current.range) }
               .each do |sibling|
-                if sibling.appropriate_body_id == current.appropriate_body_id
+                original_sibling = sibling.to_h
+                original_current = current.to_h
+
+                if sibling.appropriate_body_id == current.appropriate_body_id && sibling.induction_programme == current.induction_programme
                   case
                   when sibling.range.cover?(current.range)
                     #                  ┌─────────────────────────────┐
@@ -99,6 +90,12 @@ def periods_by_trn
                     #               ┌──────────────────────────────────────┐
                     #   Sibling     │                KEEP                  │
                     #               └──────────────────────────────────────┘
+                    sibling.number_of_terms = [sibling.number_of_terms, current.number_of_terms].max
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher/appropriate body combination. 1 was discarded",
+                      data: { originals: [original_sibling, original_current], combined: sibling.to_h }
+                    }
                     next
                   when current.range.cover?(sibling.range)
                     #               ┌──────────────────────────────────────┐
@@ -107,6 +104,12 @@ def periods_by_trn
                     #                  ┌─────────────────────────────┐
                     #   Sibling        │           DISCARD           │
                     #                  └─────────────────────────────┘
+                    current.number_of_terms = [sibling.number_of_terms, current.number_of_terms].max
+                    current.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher/appropriate body combination. 1 was discarded",
+                      data: { originals: [original_sibling, original_current], combined: current.to_h }
+                    }
                     keep.delete(sibling)
                     keep << current
                   when sibling.range.cover?(current.started_on) && !sibling.range.cover?(current.finished_on)
@@ -117,7 +120,13 @@ def periods_by_trn
                     #               ┌─────────────────────────────────────────┬───┐
                     #   Sibling     │                EXTEND                   │╳╳╳│
                     #               └─────────────────────────────────────────┴───┘
+                    current.number_of_terms = [sibling.number_of_terms, current.number_of_terms].max
                     sibling.finished_on = current.finished_on
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher/appropriate body combination. 1 was extended to cover the full duration.",
+                      data: { originals: [original_sibling, original_current], combined: sibling.to_h }
+                    }
                   when !sibling.range.cover?(current.started_on) && sibling.range.cover(current.finished_on)
                     #               ┌──────────────────────────────────────┐
                     #   Current     │              DISCARD                 │
@@ -126,10 +135,34 @@ def periods_by_trn
                     #               ┌─────┬──────────────────────────────────────┐
                     #   Sibling     │╳╳╳╳╳│             EXTEND                   │
                     #               └─────┴──────────────────────────────────────┘
+                    sibling.number_of_terms = [sibling.number_of_terms, current.number_of_terms].max
                     sibling.started_on = current.started_on
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher/appropriate body combination. 1 was extended to cover the full duration.",
+                      data: { originals: [original_sibling, original_current], combined: sibling.to_h }
+                    }
                   else
                     fail
                   end
+                elsif sibling.appropriate_body_id == current.appropriate_body_id && sibling.induction_programme != current.induction_programme
+                  case
+                  when sibling.range.cover?(current.started_on) && !sibling.range.cover?(current.finished_on)
+                    #                         ┌─────────────────────────────────┐
+                    #   Current               │          KEEP                   │
+                    #                         └─────────────────────────────────┘
+                    #
+                    #               ┌─────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┐
+                    #   Sibling     │ SHRINK  │                    ┊
+                    #               └─────────┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┘
+                    sibling.finished_on = current.started_on
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher/appropriate body combination with different induction programmes. This record was cut off when the later one started to prevent overlaps.",
+                      data: { originals: [original_sibling, original_current] }
+                    }
+                    keep << current
+                  end
                 else
                   case
                   when sibling.range.cover?(current.range) || current.range.cover?(sibling.range)
@@ -148,6 +181,11 @@ def periods_by_trn
                     #   Sibling     │ SHRINK  │                    ┊
                     #               └─────────┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┘
                     sibling.finished_on = current.started_on
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 overlapping induction periods for this teacher with different appropriate bodies. This record was cut off when the later one started to prevent overlaps.",
+                      data: { originals: [original_sibling, original_current] }
+                    }
                     keep << current
                   end
                 end
@@ -170,18 +208,6 @@ def logger
       end
     end
 
-    def appropriate_bodies
-      @appropriate_bodies ||= AppropriateBody
-        .select(:id, :legacy_id)
-        .each_with_object({}) { |t, h| h[t.legacy_id] = t.id }
-    end
-
-    def teachers
-      @teachers ||= Teacher
-        .select(:id, :trn)
-        .each_with_object({}) { |t, h| h[t.trn] = t.id }
-    end
-
     def extract_date(datetime)
       return if datetime.blank?
 
diff --git a/lib/appropriate_bodies/importers/teacher_importer.rb b/lib/appropriate_bodies/importers/teacher_importer.rb
index 4f891df9..e9d6fbb5 100644
--- a/lib/appropriate_bodies/importers/teacher_importer.rb
+++ b/lib/appropriate_bodies/importers/teacher_importer.rb
@@ -2,40 +2,59 @@
 
 module AppropriateBodies::Importers
   class TeacherImporter
-    def initialize(filename = Rails.root.join('db/samples/appropriate-body-portal/teachers.csv'))
-      @csv = CSV.read(filename, headers: true)
-    end
-
-    def import
-      count = 0
-
-      Teacher.transaction do
-        @csv.each do |row|
-          Rails.logger.debug("attempting to import row: #{row.to_h}")
+    IMPORT_ERROR_LOG = 'tmp/teacher_import.log'.freeze
 
-          next if row.values_at('trn', 'first_name', 'last_name').any?(&:blank?)
+    Row = Struct.new(:trn, :trs_first_name, :trs_last_name)
 
-          Teacher.create!(**(build(row))).tap do |teacher|
-            if (induction_extension_terms = convert_extension(row)) && induction_extension_terms.positive?
-              Rails.logger.warn("Importing extension: #{induction_extension_terms}")
-              teacher.induction_extensions.create!(number_of_terms: induction_extension_terms)
-            end
-          end
+    def initialize(filename)
+      @csv = CSV.read(filename, headers: true)
 
-          count += 1
-        end
-      end
+      File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
+      @import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
+    end
 
-      [count, @csv.count]
+    def rows
+      @csv.first(5000).map { |row| Row.new(**build(row)) }
     end
 
+    # def import
+    #   count = 0
+    #
+    #   Teacher.transaction do
+    #     @csv.each.with_index(1) do |row, i|
+    #       Rails.logger.debug("attempting to import row: #{row.to_h}")
+    #
+    #       next if row.values_at('trn', 'first_name', 'last_name').any?(&:blank?)
+    #
+    #       begin
+    #         Teacher.create!(**(build(row))).tap do |teacher|
+    #           if (induction_extension_terms = convert_extension(row)) && induction_extension_terms.positive?
+    #             Rails.logger.warn("Importing extension: #{induction_extension_terms}")
+    #             teacher.induction_extensions.create!(number_of_terms: induction_extension_terms)
+    #           end
+    #         end
+    #       rescue ActiveRecord::RecordInvalid => e
+    #         @import_error_log.error "#########################"
+    #         @import_error_log.error "Failed to import Teacher"
+    #         @import_error_log.error "Row number: #{i}"
+    #         @import_error_log.error "Message: #{e.message}"
+    #         @import_error_log.error "Row data: #{row}"
+    #       end
+    #
+    #       count += 1
+    #     end
+    #   end
+    #
+    #   [count, @csv.count]
+    # end
+
   private
 
     def build(row)
       {
         trn: row['trn'],
-        first_name: row['first_name'].strip,
-        last_name: row['last_name'].strip,
+        trs_first_name: row['first_name'].strip,
+        trs_last_name: row['last_name']&.strip,
       }
     end
 
diff --git a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
index 800f682c..690de623 100644
--- a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
+++ b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
@@ -19,23 +19,23 @@
   end
 
   let(:sample_csv) { CSV.parse(sample_csv_data, headers: true) }
-  subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+  subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
   it 'converts csv rows to Row objects when initialized' do
-    expect(subject.data).to all(be_a(AppropriateBodies::Importers::InductionPeriodImporter::Row))
+    expect(subject.rows).to all(be_a(AppropriateBodies::Importers::InductionPeriodImporter::Row))
   end
 
   it 'converts all rows' do
-    expect(subject.data.size).to eql(4)
+    expect(subject.rows.size).to eql(4)
   end
 
   describe 'mapping induction programmes' do
     it 'converts names to codes properly' do
       mappings = {
-        subject.data.find { |r| r.appropriate_body_id == ab_1.id } => 'cip',
-        subject.data.find { |r| r.appropriate_body_id == ab_2.id } => 'diy',
-        subject.data.find { |r| r.appropriate_body_id == ab_3.id } => 'fip',
-        subject.data.find { |r| r.appropriate_body_id == ab_4.id } => 'fip' # defaults when missing
+        subject.rows.find { |r| r.appropriate_body_id == ab_1.legacy_id } => 'cip',
+        subject.rows.find { |r| r.appropriate_body_id == ab_2.legacy_id } => 'diy',
+        subject.rows.find { |r| r.appropriate_body_id == ab_3.legacy_id } => 'fip',
+        subject.rows.find { |r| r.appropriate_body_id == ab_4.legacy_id } => 'fip' # defaults when missing
       }
       mappings.each do |row, expected_induction_programme|
         expect(row.to_hash.fetch(:induction_programme)).to eql(expected_induction_programme)
@@ -44,7 +44,7 @@
   end
 
   describe 'rebuilding periods' do
-    context 'when an ECT has one induction period with one AB' do
+    context 'when an ECT has no open induction periods' do
       let(:sample_csv_data) do
         <<~CSV
           appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
@@ -52,7 +52,20 @@
         CSV
       end
 
-      subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+      it 'skips them' do
+        expect(subject.periods_as_hashes_by_trn).not_to have_key('2600071')
+      end
+    end
+
+    context 'when an ECT has one induction period with one AB' do
+      let(:sample_csv_data) do
+        <<~CSV
+          appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
+          025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,,,3,2600071
+        CSV
+      end
+
+      subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
       it 'contains the original row untouched' do
         expect(subject.periods_as_hashes_by_trn).to eql(
@@ -60,11 +73,12 @@
             '2600071' => [
               AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                 started_on: Date.new(2012, 1, 1),
-                finished_on: Date.new(2012, 10, 31),
+                finished_on: nil,
                 induction_programme: nil,
-                appropriate_body_id: ab_1.id,
+                appropriate_body_id: ab_1.legacy_id,
                 trn: '2600071',
-                number_of_terms: 3
+                number_of_terms: 3,
+                notes: []
               ).to_hash
             ]
           }
@@ -72,17 +86,18 @@
       end
     end
 
-    context 'when an ECT has two induction period with one AB' do
+    context 'when an ECT has two induction periods that have the same programme type with one AB' do
       context 'and the first period ends after the second period has started' do
         let(:sample_csv_data) do
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
-            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,Core Induction Programme,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,3,2600071
             025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,04/04/2012 00:00:00,Full Induction Programme,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2021 00:00:00,,Full Induction Programme,,2600071
           CSV
         end
 
-        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
         it 'combines the two periods so the new period has the earliest start date and the latest finish date' do
           expect(subject.periods_as_hashes_by_trn).to eql(
@@ -91,10 +106,21 @@
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
-                  induction_programme: 'Core Induction Programme',
-                  appropriate_body_id: ab_1.id,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 3,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 2, 2),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 3
+                  number_of_terms: 0,
+                  notes: []
                 ).to_hash
               ]
             }
@@ -106,12 +132,13 @@
         let(:sample_csv_data) do
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
-            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,04/04/2012 00:00:00,,3,2600071
-            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,04/04/2012 00:00:00,Full Induction Programme,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,4,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2021 00:00:00,,Full Induction Programme,,2600071
           CSV
         end
 
-        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
         it 'combines the two periods so the new period has the earliest start date and the latest finish date' do
           expect(subject.periods_as_hashes_by_trn).to eql(
@@ -120,39 +147,66 @@
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
-                  induction_programme: nil,
-                  appropriate_body_id: ab_1.id,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 3
+                  number_of_terms: 4
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 1, 1),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 0
                 ).to_hash
               ]
             }
           )
         end
+
+        it 'keeps the higher number of terms' do
+          number_of_terms = subject.periods_as_hashes_by_trn.dig('2600071', 0, :number_of_terms)
+
+          expect(number_of_terms).to be(4)
+        end
       end
 
       context 'and the first period contains the second' do
         let(:sample_csv_data) do
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
-            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,04/04/2012 00:00:00,,2,2600071
-            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,03/03/2012 00:00:00,,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,04/04/2012 00:00:00,Full Induction Programme,2,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2021 00:00:00,,Full Induction Programme,3,2600071
           CSV
         end
 
-        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
-        it 'only the first is kept' do
+        it 'keeps the dates from the first' do
           expect(subject.periods_as_hashes_by_trn).to eql(
             {
               '2600071' => [
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
-                  induction_programme: nil,
-                  appropriate_body_id: ab_1.id,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 2
+                  number_of_terms: 3,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 2, 2),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 3,
+                  notes: []
                 ).to_hash
               ]
             }
@@ -164,12 +218,13 @@
         let(:sample_csv_data) do
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
-            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,03/03/2012 00:00:00,Core Induction Programme,1,2600071
-            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,04/04/2012 00:00:00,Full induction programme,4,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,1,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,04/04/2012 00:00:00,Full Induction Programme,4,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2021 00:00:00,,Full induction programme,0,2600071
           CSV
         end
 
-        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(csv: sample_csv) }
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
 
         it 'only the second is kept' do
           expect(subject.periods_as_hashes_by_trn).to eql(
@@ -179,9 +234,20 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.id,
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 4
+                  number_of_terms: 4,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 1, 1),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 0,
+                  notes: []
                 ).to_hash
               ]
             }
@@ -190,13 +256,16 @@
       end
     end
 
-    context 'when an ECT has two induction period with one AB' do
+    context 'when an ECT has two induction periods that have different programme types with one AB' do
+      xspecify 'when one contains the other'
+
       context 'when the periods do not overlap' do
         let(:sample_csv_data) do
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
-            1ddf3e82-c1ae-e311-b8ed-005056822391,01/01/2012 00:00:00,03/03/2012 00:00:00,Core Induction Programme,4,3600071
-            025e61e7-ec32-eb11-a813-000d3a228dfc,03/03/2012 00:00:00,05/05/2012 00:00:00,Full Induction Programme,2,3600071
+            #{ab_2.legacy_id},01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,4,3600071
+            #{ab_2.legacy_id},03/03/2012 00:00:00,05/05/2012 00:00:00,Core Induction Programme,2,3600071
+            #{ab_2.legacy_id},03/03/2021 00:00:00,,Full Induction Programme,2,3600071
           CSV
         end
 
@@ -207,18 +276,130 @@
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 3, 3),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 4,
+                  notes: []
+                ).to_hash,
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 3, 3),
+                  finished_on: Date.new(2012, 5, 5),
                   induction_programme: 'Core Induction Programme',
-                  appropriate_body_id: ab_2.id,
+                  appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 4
+                  number_of_terms: 2,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 3, 3),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 2,
+                  notes: []
+                ).to_hash
+              ]
+            }
+          )
+        end
+      end
+
+      context 'when the periods overlap' do
+        let(:sample_csv_data) do
+          <<~CSV
+            appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
+            #{ab_2.legacy_id},01/01/2012 00:00:00,05/05/2012 00:00:00,Full Induction Programme,4,3600071
+            #{ab_2.legacy_id},04/04/2012 00:00:00,06/06/2012 00:00:00,Core Induction Programme,2,3600071
+            #{ab_2.legacy_id},03/03/2021 00:00:00,,Full Induction Programme,2,3600071
+          CSV
+        end
+
+        it 'the earlier one is curtailed so it does not clash with the later one' do
+          expect(subject.periods_as_hashes_by_trn).to eql(
+            {
+              '3600071' => [
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 1, 1),
+                  finished_on: Date.new(2012, 4, 4),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 4,
+                  notes: []
+                ).to_hash,
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 4, 4),
+                  finished_on: Date.new(2012, 6, 6),
+                  induction_programme: 'Core Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 2,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 3, 3),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 2,
+                  notes: []
+                ).to_hash
+              ]
+            }
+          )
+        end
+      end
+    end
+
+    context 'when an ECT has two induction period with different ABs' do
+      xspecify 'when one contains the other'
+
+      context 'when the periods do not overlap' do
+        let(:sample_csv_data) do
+          <<~CSV
+            appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
+            1ddf3e82-c1ae-e311-b8ed-005056822391,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,4,3600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,03/03/2012 00:00:00,05/05/2012 00:00:00,Full Induction Programme,2,3600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,03/03/2021 00:00:00,,Full Induction Programme,2,3600071
+          CSV
+        end
+
+        it 'both are kept' do
+          expect(subject.periods_as_hashes_by_trn).to eql(
+            {
+              '3600071' => [
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 1, 1),
+                  finished_on: Date.new(2012, 3, 3),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_2.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 4,
+                  notes: []
                 ).to_hash,
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 3, 3),
                   finished_on: Date.new(2012, 5, 5),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 2,
+                  notes: []
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 3, 3),
+                  finished_on: nil,
                   induction_programme: 'Full Induction Prorgramme',
-                  appropriate_body_id: ab_1.id,
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
-                  number_of_terms: 2
+                  number_of_terms: 2,
+                  notes: []
                 ).to_hash
               ]
             }
@@ -231,7 +412,8 @@
           <<~CSV
             appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
             1ddf3e82-c1ae-e311-b8ed-005056822391,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,1,3600071
-            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,05/05/2012 00:00:00,Core Induction Programme,4,3600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2012 00:00:00,05/05/2012 00:00:00,Full Induction Programme,4,3600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,02/02/2021 00:00:00,,Full Induction Programme,4,3600071
           CSV
         end
 
@@ -243,15 +425,24 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 2, 2),
                   induction_programme: 'Full Induction Progamme',
-                  appropriate_body_id: ab_2.id,
+                  appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 1
                 ).to_hash,
                 AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
                   started_on: Date.new(2012, 2, 2),
                   finished_on: Date.new(2012, 5, 5),
-                  induction_programme: 'Core Induction Programme',
-                  appropriate_body_id: ab_1.id,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 4
+                ).to_hash,
+                # unrelated ongoing record so induction periods are included by the import
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2021, 2, 2),
+                  finished_on: nil,
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4
                 ).to_hash

From 950b8b02337a54a2c0704ab1d89b078c6984e4f2 Mon Sep 17 00:00:00 2001
From: Peter Yates <peter.yates@graphia.co.uk>
Date: Mon, 27 Jan 2025 12:35:03 +0000
Subject: [PATCH 4/5] Add special cases for periods that start on the same day

When one has an end date and number of terms it should be favoured over
the other (ongoing one) that doesn't.
---
 .../importers/induction_period_importer.rb    | 36 +++++++++++
 .../induction_period_importer_spec.rb         | 60 +++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/lib/appropriate_bodies/importers/induction_period_importer.rb b/lib/appropriate_bodies/importers/induction_period_importer.rb
index 3394afe4..ffe4f4d6 100644
--- a/lib/appropriate_bodies/importers/induction_period_importer.rb
+++ b/lib/appropriate_bodies/importers/induction_period_importer.rb
@@ -16,6 +16,14 @@ def length
         (finished_on || Time.zone.today) - started_on
       end
 
+      def finished?
+        finished_on.present? && number_of_terms.present?
+      end
+
+      def ongoing?
+        !finished?
+      end
+
       # used in notes
       def to_h
         { appropriate_body_id:, started_on:, finished_on:, induction_programme:, number_of_terms: }
@@ -83,6 +91,33 @@ def periods_by_trn
 
                 if sibling.appropriate_body_id == current.appropriate_body_id && sibling.induction_programme == current.induction_programme
                   case
+                  when current.started_on == sibling.started_on && current.finished? && sibling.ongoing?
+                    #               ┌─────────────────────────────────────────┐
+                    #   Current     │                  KEEP                   │
+                    #               └─────────────────────────────────────────┘
+                    #               ┌──────────────────────────────────────────────────────────>
+                    #   Sibling     │              DISCARD
+                    #               └──────────────────────────────────────────────────────────>
+                    current.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 induction periods for this teacher/appropriate body combination with the same start date. The ongoing one was discarded.",
+                      data: { originals: [original_sibling, original_current], combined: current.to_h }
+                    }
+                    keep.delete(sibling)
+                    keep << current
+                  when current.started_on == sibling.started_on && sibling.finished? && current.ongoing?
+                    #               ┌──────────────────────────────────────────────────────────>
+                    #   Current     │                  DISCARD
+                    #               └──────────────────────────────────────────────────────────>
+                    #               ┌─────────────────────────────────────────┐
+                    #   Sibling     │              KEEP                       │
+                    #               └─────────────────────────────────────────┘
+                    sibling.notes << {
+                      heading: "Imported from DQT",
+                      body: "DQT held 2 induction periods for this teacher/appropriate body combination with the same start date. The ongoing one was discarded.",
+                      data: { originals: [original_sibling, original_current], combined: sibling.to_h }
+                    }
+                    next
                   when sibling.range.cover?(current.range)
                     #                  ┌─────────────────────────────┐
                     #   Current        │           DISCARD           │
@@ -120,6 +155,7 @@ def periods_by_trn
                     #               ┌─────────────────────────────────────────┬───┐
                     #   Sibling     │                EXTEND                   │╳╳╳│
                     #               └─────────────────────────────────────────┴───┘
+
                     current.number_of_terms = [sibling.number_of_terms, current.number_of_terms].max
                     sibling.finished_on = current.finished_on
                     sibling.notes << {
diff --git a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
index 690de623..7ec3f084 100644
--- a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
+++ b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
@@ -128,6 +128,66 @@
         end
       end
 
+      context 'and the first period has an end date and terms but the second does not' do
+        let(:sample_csv_data) do
+          <<~CSV
+            appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,3,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,,Full Induction Programme,,2600071
+          CSV
+        end
+
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
+
+        it 'it keeps the finished (first) period' do
+          expect(subject.periods_as_hashes_by_trn).to eql(
+            {
+              '2600071' => [
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 1, 1),
+                  finished_on: Date.new(2012, 3, 3),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 3,
+                  notes: []
+                ).to_hash,
+              ]
+            }
+          )
+        end
+      end
+
+      context 'and the second period has an end date and terms but the first does not' do
+        let(:sample_csv_data) do
+          <<~CSV
+            appropriate_body_id,started_on,finished_on,induction_programme_choice,number_of_terms,trn
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,,Full Induction Programme,,2600071
+            025e61e7-ec32-eb11-a813-000d3a228dfc,01/01/2012 00:00:00,03/03/2012 00:00:00,Full Induction Programme,3,2600071
+          CSV
+        end
+
+        subject { AppropriateBodies::Importers::InductionPeriodImporter.new(nil, csv: sample_csv) }
+
+        it 'it keeps the finished (second) period' do
+          expect(subject.periods_as_hashes_by_trn).to eql(
+            {
+              '2600071' => [
+                AppropriateBodies::Importers::InductionPeriodImporter::Row.new(
+                  started_on: Date.new(2012, 1, 1),
+                  finished_on: Date.new(2012, 3, 3),
+                  induction_programme: 'Full Induction Programme',
+                  appropriate_body_id: ab_1.legacy_id,
+                  trn: '2600071',
+                  number_of_terms: 3,
+                  notes: []
+                ).to_hash,
+              ]
+            }
+          )
+        end
+      end
+
       context 'and the second period ends after the first period has started' do
         let(:sample_csv_data) do
           <<~CSV

From c3f8871cdc69f1bddc1b752e352752e53ed541da Mon Sep 17 00:00:00 2001
From: Peter Yates <peter.yates@graphia.co.uk>
Date: Fri, 24 Jan 2025 16:48:36 +0000
Subject: [PATCH 5/5] Rework the import process

This change makes the three importers (teacher, AB, IP) less independent
and puts the AppropriateBodies::Importers::Importer class in charge.

This means the three importers can depend on each other, which means we
can make the process *much faster*.

The raw teachers table has 1.8m records and parsing it into a Ruby CSV
file is slow, and we only want ~50k of those records so it's a pointless
exercise. Now we use the induction periods table to work out which
teachers we need, and loop through the CSV file looking for the rows we
want, before parsing.
---
 .../importers/appropriate_body_importer.rb    | 35 +++-----
 lib/appropriate_bodies/importers/importer.rb  | 59 +++++++++++--
 .../importers/induction_period_importer.rb    | 26 ++++--
 .../importers/teacher_importer.rb             | 82 ++++++++++---------
 .../induction_period_importer_spec.rb         | 54 ++++++------
 5 files changed, 153 insertions(+), 103 deletions(-)

diff --git a/lib/appropriate_bodies/importers/appropriate_body_importer.rb b/lib/appropriate_bodies/importers/appropriate_body_importer.rb
index 8842ac8a..8ffe2604 100644
--- a/lib/appropriate_bodies/importers/appropriate_body_importer.rb
+++ b/lib/appropriate_bodies/importers/appropriate_body_importer.rb
@@ -4,38 +4,27 @@ module AppropriateBodies::Importers
   class AppropriateBodyImporter
     IMPORT_ERROR_LOG = 'tmp/appropriate_body_import.log'.freeze
 
-    Row = Struct.new(:legacy_id, :name, :dfe_sign_in_organisation_id, :local_authority_code, :establishment_number)
+    Row = Struct.new(:legacy_id, :name, :dfe_sign_in_organisation_id, :local_authority_code, :establishment_number) do
+      def to_h
+        { name:, legacy_id:, establishment_number:, local_authority_code:, dfe_sign_in_organisation_id: SecureRandom.uuid } # FIXME: fix dfe_sign_in_organisation_id
+      end
+    end
 
-    def initialize(filename)
+    def initialize(filename, wanted_legacy_ids)
       @csv = CSV.read(filename, headers: true)
+      @wanted_legacy_ids = wanted_legacy_ids
 
       File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
       @import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
     end
 
     def rows
-      @csv.map { |row| Row.new(**build(row)) }
-    end
+      @csv.map { |row|
+        next unless row['id'].in?(@wanted_legacy_ids)
 
-    # def import
-    #   AppropriateBody.transaction do
-    #     @csv.each.with_index(1) do |row, i|
-    #       Rails.logger.debug("attempting to import row: #{row.to_h}")
-    #
-    #       begin
-    #         AppropriateBody.create!(**build(row))
-    #       rescue ActiveRecord::RecordInvalid => e
-    #         @import_error_log.error "#########################"
-    #         @import_error_log.error "Failed to import Appropriate Body"
-    #         @import_error_log.error "Row number: #{i}"
-    #         @import_error_log.error "Message: #{e.message}"
-    #         @import_error_log.error "Row data: #{row}"
-    #       end
-    #     end
-    #   end
-    #
-    #   @csv.count
-    # end
+        Row.new(**build(row))
+      }.compact
+    end
 
   private
 
diff --git a/lib/appropriate_bodies/importers/importer.rb b/lib/appropriate_bodies/importers/importer.rb
index f50bd7a5..8f20338e 100644
--- a/lib/appropriate_bodies/importers/importer.rb
+++ b/lib/appropriate_bodies/importers/importer.rb
@@ -1,15 +1,64 @@
 module AppropriateBodies::Importers
   class Importer
+    # rubocop:disable Rails/Output
     def initialize(appropriate_body_csv, teachers_csv, induction_period_csv)
-      # ab_importer_rows = AppropriateBodyImporter.new(appropriate_body_csv).rows
-      # teacher_importer_rows = TeacherImporter.new(teachers_csv).rows
       induction_periods_grouped_by_trn = InductionPeriodImporter.new(induction_period_csv).periods_by_trn
 
-      true
+      active_teachers = induction_periods_grouped_by_trn.keys
+      teacher_importer_rows = TeacherImporter.new(teachers_csv, active_teachers).rows
+
+      active_abs = induction_periods_grouped_by_trn.flat_map { |_trn, ips| ips.map(&:legacy_appropriate_body_id) }.uniq
+      ab_importer_rows = AppropriateBodyImporter.new(appropriate_body_csv, active_abs).rows
+
+      puts "Active appropriate bodies: #{active_abs.count}"
+      # FIXME: use insert_all! here
+      AppropriateBody.insert_all(ab_importer_rows.select { |r| r.legacy_id.in?(active_abs) }.map(&:to_h))
+      puts "Appropriate bodies inserted: #{AppropriateBody.count}"
+
+      puts "Active Teachers: #{teacher_importer_rows.count}"
+      # FIXME: use insert_all! here
+      Teacher.insert_all(teacher_importer_rows.map(&:to_h))
+      puts "Teachers inserted: #{Teacher.count}"
+
+      # TODO: insert induction periods
+      teacher_trn_to_id = Teacher.all.select(:id, :trn).each_with_object({}) do |t, h|
+        h[t[:trn]] = t[:id]
+      end
+
+      ab_legacy_id_to_id = AppropriateBody.all.select(:id, :legacy_id).each_with_object({}) do |ab, h|
+        h[ab[:legacy_id]] = ab[:id]
+      end
+
+      induction_period_rows = []
+
+      induction_periods_grouped_by_trn.each do |trn, induction_periods|
+        induction_periods.each do |ip|
+          begin
+            ip.teacher_id = teacher_trn_to_id.fetch(trn)
+          rescue KeyError
+            puts "No teacher found with trn: #{trn}"
+            next
+          end
+
+          begin
+            ip.appropriate_body_id = ab_legacy_id_to_id.fetch(ip.legacy_appropriate_body_id)
+          rescue KeyError
+            puts "No appropriate body found with legacy_id: #{ip.legacy_appropriate_body_id}"
+            next
+          end
+
+          induction_period_rows << ip
+        end
+      end
+
+      # FIXME: use insert_all! here
+      InductionPeriod.insert_all(induction_period_rows.map(&:to_record))
+
+      # TODO: insert extensions
+      # TODO: insert events
 
       binding.debugger
-      # get induction periods grouped by TRN
-      #
     end
+    # rubocop:enable Rails/Output
   end
 end
diff --git a/lib/appropriate_bodies/importers/induction_period_importer.rb b/lib/appropriate_bodies/importers/induction_period_importer.rb
index ffe4f4d6..a1a02f03 100644
--- a/lib/appropriate_bodies/importers/induction_period_importer.rb
+++ b/lib/appropriate_bodies/importers/induction_period_importer.rb
@@ -7,7 +7,7 @@ class InductionPeriodImporter
 
     attr_accessor :csv, :data
 
-    Row = Struct.new(:appropriate_body_id, :started_on, :finished_on, :induction_programme, :number_of_terms, :trn, :notes, keyword_init: true) do
+    Row = Struct.new(:legacy_appropriate_body_id, :started_on, :finished_on, :induction_programme, :number_of_terms, :trn, :notes, :teacher_id, :appropriate_body_id, keyword_init: true) do
       def range
         started_on...finished_on
       end
@@ -26,12 +26,16 @@ def ongoing?
 
       # used in notes
       def to_h
-        { appropriate_body_id:, started_on:, finished_on:, induction_programme:, number_of_terms: }
+        { appropriate_body_id: legacy_appropriate_body_id, started_on:, finished_on:, induction_programme:, number_of_terms: }
       end
 
       # used for comparisons in tests
       def to_hash
-        { appropriate_body_id:, started_on:, finished_on:, number_of_terms:, induction_programme: convert_induction_programme }
+        { appropriate_body_id:, started_on:, finished_on:, induction_programme: convert_induction_programme, number_of_terms: }
+      end
+
+      def to_record
+        { appropriate_body_id:, started_on:, finished_on:, induction_programme: convert_induction_programme, number_of_terms:, teacher_id: }
       end
 
     private
@@ -58,13 +62,15 @@ def rows
 
     def build(row)
       {
-        appropriate_body_id: row['appropriate_body_id'],
+        legacy_appropriate_body_id: row['appropriate_body_id'],
         started_on: extract_date(row['started_on']),
         finished_on: extract_date(row['finished_on']),
         induction_programme: row['induction_programme_choice'],
         number_of_terms: row['number_of_terms'].to_i,
         trn: row['trn'],
-        notes: []
+        notes: [],
+        appropriate_body_id: nil,
+        teacher_id: nil
       }
     end
 
@@ -89,7 +95,7 @@ def periods_by_trn
                 original_sibling = sibling.to_h
                 original_current = current.to_h
 
-                if sibling.appropriate_body_id == current.appropriate_body_id && sibling.induction_programme == current.induction_programme
+                if sibling.legacy_appropriate_body_id == current.legacy_appropriate_body_id && sibling.induction_programme == current.induction_programme
                   case
                   when current.started_on == sibling.started_on && current.finished? && sibling.ongoing?
                     #               ┌─────────────────────────────────────────┐
@@ -181,7 +187,7 @@ def periods_by_trn
                   else
                     fail
                   end
-                elsif sibling.appropriate_body_id == current.appropriate_body_id && sibling.induction_programme != current.induction_programme
+                elsif sibling.legacy_appropriate_body_id == current.legacy_appropriate_body_id && sibling.induction_programme != current.induction_programme
                   case
                   when sibling.range.cover?(current.started_on) && !sibling.range.cover?(current.finished_on)
                     #                         ┌─────────────────────────────────┐
@@ -199,10 +205,12 @@ def periods_by_trn
                     }
                     keep << current
                   end
-                else
+                else # different appropriate bodies
                   case
+                  when sibling.started_on == current.started_on
+                    # TODO: work out what to do here
                   when sibling.range.cover?(current.range) || current.range.cover?(sibling.range)
-                    # an indunction period from one AB entirely contains one from another,
+                    # an induction period from one AB entirely contains one from another,
                     # which do we keep?
                     #
                     # This might never happen in prod so let's ignore it for now
diff --git a/lib/appropriate_bodies/importers/teacher_importer.rb b/lib/appropriate_bodies/importers/teacher_importer.rb
index e9d6fbb5..e43eea52 100644
--- a/lib/appropriate_bodies/importers/teacher_importer.rb
+++ b/lib/appropriate_bodies/importers/teacher_importer.rb
@@ -4,57 +4,61 @@ module AppropriateBodies::Importers
   class TeacherImporter
     IMPORT_ERROR_LOG = 'tmp/teacher_import.log'.freeze
 
-    Row = Struct.new(:trn, :trs_first_name, :trs_last_name)
+    Row = Struct.new(:trn, :first_name, :last_name, :induction_status) do
+      def to_h
+        {
+          trn:,
+          trs_first_name: first_name_with_fallback,
+          trs_last_name: last_name_with_fallback,
+        }
+      end
 
-    def initialize(filename)
-      @csv = CSV.read(filename, headers: true)
+      def first_name_with_fallback
+        first_name || 'Unknown'
+      end
 
-      File.open(IMPORT_ERROR_LOG, 'w') { |f| f.truncate(0) }
-      @import_error_log = Logger.new(IMPORT_ERROR_LOG, File::CREAT)
+      def last_name_with_fallback
+        last_name || 'Unknown'
+      end
     end
 
-    def rows
-      @csv.first(5000).map { |row| Row.new(**build(row)) }
+    def initialize(filename, wanted_trns)
+      sorted_wanted_trns = wanted_trns.sort
+
+      file = File.readlines(filename)
+      file.delete_at(0)
+      sorted_lines = file.sort
+
+      wanted_lines = []
+
+      seek = sorted_wanted_trns.shift
+
+      sorted_lines.each do |line|
+        next unless line.start_with?(seek)
+
+        wanted_lines << line
+
+        break if sorted_wanted_trns.empty?
+
+        seek = sorted_wanted_trns.shift
+      end
+
+      @csv = CSV.parse(wanted_lines.join, headers: %w[trn first_name last_name induction_status])
     end
 
-    # def import
-    #   count = 0
-    #
-    #   Teacher.transaction do
-    #     @csv.each.with_index(1) do |row, i|
-    #       Rails.logger.debug("attempting to import row: #{row.to_h}")
-    #
-    #       next if row.values_at('trn', 'first_name', 'last_name').any?(&:blank?)
-    #
-    #       begin
-    #         Teacher.create!(**(build(row))).tap do |teacher|
-    #           if (induction_extension_terms = convert_extension(row)) && induction_extension_terms.positive?
-    #             Rails.logger.warn("Importing extension: #{induction_extension_terms}")
-    #             teacher.induction_extensions.create!(number_of_terms: induction_extension_terms)
-    #           end
-    #         end
-    #       rescue ActiveRecord::RecordInvalid => e
-    #         @import_error_log.error "#########################"
-    #         @import_error_log.error "Failed to import Teacher"
-    #         @import_error_log.error "Row number: #{i}"
-    #         @import_error_log.error "Message: #{e.message}"
-    #         @import_error_log.error "Row data: #{row}"
-    #       end
-    #
-    #       count += 1
-    #     end
-    #   end
-    #
-    #   [count, @csv.count]
-    # end
+    def rows
+      @rows ||= @csv.map { |row| Row.new(**build(row)) }
+    end
 
   private
 
     def build(row)
       {
         trn: row['trn'],
-        trs_first_name: row['first_name'].strip,
-        trs_last_name: row['last_name']&.strip,
+        first_name: row['first_name']&.strip,
+        last_name: row['last_name']&.strip,
+        induction_status: row['induction_status']
+        # FIXME: extensions
       }
     end
 
diff --git a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
index 7ec3f084..85cdc790 100644
--- a/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
+++ b/spec/lib/appropriate_bodies/importers/induction_period_importer_spec.rb
@@ -32,10 +32,10 @@
   describe 'mapping induction programmes' do
     it 'converts names to codes properly' do
       mappings = {
-        subject.rows.find { |r| r.appropriate_body_id == ab_1.legacy_id } => 'cip',
-        subject.rows.find { |r| r.appropriate_body_id == ab_2.legacy_id } => 'diy',
-        subject.rows.find { |r| r.appropriate_body_id == ab_3.legacy_id } => 'fip',
-        subject.rows.find { |r| r.appropriate_body_id == ab_4.legacy_id } => 'fip' # defaults when missing
+        subject.rows.find { |r| r.legacy_appropriate_body_id == ab_1.legacy_id } => 'cip',
+        subject.rows.find { |r| r.legacy_appropriate_body_id == ab_2.legacy_id } => 'diy',
+        subject.rows.find { |r| r.legacy_appropriate_body_id == ab_3.legacy_id } => 'fip',
+        subject.rows.find { |r| r.legacy_appropriate_body_id == ab_4.legacy_id } => 'fip' # defaults when missing
       }
       mappings.each do |row, expected_induction_programme|
         expect(row.to_hash.fetch(:induction_programme)).to eql(expected_induction_programme)
@@ -75,7 +75,7 @@
                 started_on: Date.new(2012, 1, 1),
                 finished_on: nil,
                 induction_programme: nil,
-                appropriate_body_id: ab_1.legacy_id,
+                legacy_appropriate_body_id: ab_1.legacy_id,
                 trn: '2600071',
                 number_of_terms: 3,
                 notes: []
@@ -107,7 +107,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 3,
                   notes: []
@@ -117,7 +117,7 @@
                   started_on: Date.new(2021, 2, 2),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 0,
                   notes: []
@@ -147,7 +147,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 3, 3),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 3,
                   notes: []
@@ -177,7 +177,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 3, 3),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 3,
                   notes: []
@@ -208,7 +208,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4
                 ).to_hash,
@@ -217,7 +217,7 @@
                   started_on: Date.new(2021, 1, 1),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 0
                 ).to_hash
@@ -253,7 +253,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 3,
                   notes: []
@@ -263,7 +263,7 @@
                   started_on: Date.new(2021, 2, 2),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 3,
                   notes: []
@@ -294,7 +294,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4,
                   notes: []
@@ -304,7 +304,7 @@
                   started_on: Date.new(2021, 1, 1),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 0,
                   notes: []
@@ -337,7 +337,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 3, 3),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4,
                   notes: []
@@ -346,7 +346,7 @@
                   started_on: Date.new(2012, 3, 3),
                   finished_on: Date.new(2012, 5, 5),
                   induction_programme: 'Core Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -356,7 +356,7 @@
                   started_on: Date.new(2021, 3, 3),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -385,7 +385,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 4, 4),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4,
                   notes: []
@@ -394,7 +394,7 @@
                   started_on: Date.new(2012, 4, 4),
                   finished_on: Date.new(2012, 6, 6),
                   induction_programme: 'Core Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -404,7 +404,7 @@
                   started_on: Date.new(2021, 3, 3),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -437,7 +437,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 3, 3),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4,
                   notes: []
@@ -446,7 +446,7 @@
                   started_on: Date.new(2012, 3, 3),
                   finished_on: Date.new(2012, 5, 5),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -456,7 +456,7 @@
                   started_on: Date.new(2021, 3, 3),
                   finished_on: nil,
                   induction_programme: 'Full Induction Prorgramme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 2,
                   notes: []
@@ -485,7 +485,7 @@
                   started_on: Date.new(2012, 1, 1),
                   finished_on: Date.new(2012, 2, 2),
                   induction_programme: 'Full Induction Progamme',
-                  appropriate_body_id: ab_2.legacy_id,
+                  legacy_appropriate_body_id: ab_2.legacy_id,
                   trn: '2600071',
                   number_of_terms: 1
                 ).to_hash,
@@ -493,7 +493,7 @@
                   started_on: Date.new(2012, 2, 2),
                   finished_on: Date.new(2012, 5, 5),
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4
                 ).to_hash,
@@ -502,7 +502,7 @@
                   started_on: Date.new(2021, 2, 2),
                   finished_on: nil,
                   induction_programme: 'Full Induction Programme',
-                  appropriate_body_id: ab_1.legacy_id,
+                  legacy_appropriate_body_id: ab_1.legacy_id,
                   trn: '2600071',
                   number_of_terms: 4
                 ).to_hash