Skip to content
This repository has been archived by the owner on Sep 2, 2024. It is now read-only.

Commit

Permalink
Generates index name from the versions listed in the file name (#66)
Browse files Browse the repository at this point in the history
* Generates index name from the versions listed in the file name

* Uncomments commented tests
  • Loading branch information
Mironor authored Apr 5, 2017
1 parent c92d3ea commit 48a2f5a
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 14 deletions.
6 changes: 4 additions & 2 deletions batch/src/main/scala/uk/gov/ons/addressindex/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ For usage see below:
}

// each run of this application has a unique index name
val baseIndexName = config.getString("addressindex.elasticsearch.indices.hybrid")
val indexName = s"${baseIndexName}_${System.currentTimeMillis()}"
val indexName = generateIndexName()

if (!opts.help()) {
AddressIndexFileReader.validateFileNames()

if (opts.mapping()) postMapping(indexName)
if (opts.hybrid()) saveHybridAddresses()

} else opts.printHelp()

private def generateIndexName(): String = AddressIndexFileReader.generateIndexNameFromFileName()

private def generateNagAddresses(): DataFrame = {
val blpu = AddressIndexFileReader.readBlpuCSV()
val lpi = AddressIndexFileReader.readLpiCSV()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,24 @@ import uk.gov.ons.addressindex.utils.SparkProvider
*/
object AddressIndexFileReader {

private lazy val config = ConfigFactory.load()
private lazy val pathToCsv = config.getString("addressindex.files.csv.delivery-point")
private lazy val pathToBlpuCSV = config.getString("addressindex.files.csv.blpu")
private lazy val pathToClassificationCSV = config.getString("addressindex.files.csv.classification")
private lazy val pathToCrossrefCSV = config.getString("addressindex.files.csv.crossref")
private lazy val pathToLpiCSV = config.getString("addressindex.files.csv.lpi")
private lazy val pathToOrganisationCSV = config.getString("addressindex.files.csv.organisation")
private lazy val pathToStreetCSV = config.getString("addressindex.files.csv.street")
private lazy val pathToStreetDescriptorCSV = config.getString("addressindex.files.csv.street-descriptor")
private lazy val pathToSuccessorCSV = config.getString("addressindex.files.csv.successor")
private lazy val pathToHierarchyCSV = config.getString("addressindex.files.csv.hierarchy")
lazy val config = ConfigFactory.load()
lazy val pathToDeliveryPointCsv = config.getString("addressindex.files.csv.delivery-point")
lazy val pathToBlpuCSV = config.getString("addressindex.files.csv.blpu")
lazy val pathToClassificationCSV = config.getString("addressindex.files.csv.classification")
lazy val pathToCrossrefCSV = config.getString("addressindex.files.csv.crossref")
lazy val pathToLpiCSV = config.getString("addressindex.files.csv.lpi")
lazy val pathToOrganisationCSV = config.getString("addressindex.files.csv.organisation")
lazy val pathToStreetCSV = config.getString("addressindex.files.csv.street")
lazy val pathToStreetDescriptorCSV = config.getString("addressindex.files.csv.street-descriptor")
lazy val pathToSuccessorCSV = config.getString("addressindex.files.csv.successor")
lazy val pathToHierarchyCSV = config.getString("addressindex.files.csv.hierarchy")

/**
* Reads csv into a `DataFrame`
*
* @return `DataFrame` containing the delivery point data from CSV
*/
def readDeliveryPointCSV(): DataFrame = readCsv(pathToCsv, CSVSchemas.postcodeAddressFileSchema)
def readDeliveryPointCSV(): DataFrame = readCsv(pathToDeliveryPointCsv, CSVSchemas.postcodeAddressFileSchema)

/**
* Reads csv into a 'DataFrame'
Expand Down Expand Up @@ -113,4 +113,51 @@ object AddressIndexFileReader {
}
}
}

def validateFileNames(): Boolean = {

val epoch = extractEpoch(pathToDeliveryPointCsv)
val date = extractDate(pathToDeliveryPointCsv)

Seq(
pathToDeliveryPointCsv,
pathToBlpuCSV,
pathToClassificationCSV,
pathToCrossrefCSV,
pathToLpiCSV,
pathToOrganisationCSV,
pathToStreetCSV,
pathToStreetDescriptorCSV,
pathToSuccessorCSV,
pathToHierarchyCSV
).forall(fileName => validateFileName(fileName, epoch, date))

}

def validateFileName(filePath: String, epoch: Int, date: String): Boolean = {
val nameRegex = s"ABP_E$epoch.+_v$date$$".r

if (nameRegex.findFirstIn(filePath).isDefined) true
else throw new IllegalArgumentException(s"file $filePath does not contain epoch $epoch and date $date")
}

def extractEpoch(filePath: String): Int = {
val epochRegex = s"ABP_E(\\d+).+$$".r
val epoch = epochRegex.findFirstMatchIn(filePath).getOrElse(throw new IllegalArgumentException(s"file $filePath does not contain epoch number"))
epoch.group(1).toInt
}

def extractDate(filePath: String): String ={
val dateRegex = s"ABP_E.+(\\d{6})$$".r
val date = dateRegex.findFirstMatchIn(filePath).getOrElse(throw new IllegalArgumentException(s"file $filePath does not contain valid date"))
date.group(1)
}

def generateIndexNameFromFileName(): String = {
val epoch = extractEpoch(pathToDeliveryPointCsv)
val date = extractDate(pathToDeliveryPointCsv)

val baseIndexName = config.getString("addressindex.elasticsearch.indices.hybrid")
s"${baseIndexName}_${epoch}_${date}_${System.currentTimeMillis()}"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -290,5 +290,62 @@ class AddressIndexFileReaderSpec extends WordSpec with Matchers {
line.getInt(4) shouldBe 3 // CURRENT_LAYER
line.getLong(2) shouldBe 2 // PARENT_UPRN
}

"extract epoch from the file path" in {
// Given
val filePath = "hdfs://path/to/file/ABP_E39_BLPU_v040506"
val expected = 39

// When
val result = AddressIndexFileReader.extractEpoch(filePath)

// Then
result shouldBe expected
}

"throw exception if no epoch could be extracted" in {
// Given
val filePath = "hdfs://path/to/file/ABP_E_BLPU_v040506"

// When Then
intercept[IllegalArgumentException]{
AddressIndexFileReader.extractEpoch(filePath)
}
}

"extract date from the file path" in {
// Given
val filePath = "hdfs://path/to/file/ABP_E39_BLPU_v040506"
val expected = "040506"

// When
val result = AddressIndexFileReader.extractDate(filePath)

// Then
result shouldBe expected
}

"throw exception if no date could be extracted" in {
// Given
val filePath = "hdfs://path/to/file/ABP_E39_BLPU"

// When Then
intercept[IllegalArgumentException]{
AddressIndexFileReader.extractDate(filePath)
}
}

"throw exception if file could not be validated" in {
// Given
val filePath = "hdfs://path/to/file/ABP_E39_BLPU"
val epoch = 40
val date = "010203"

// When Then
intercept[IllegalArgumentException]{
AddressIndexFileReader.validateFileName(filePath, epoch, date)
}

}
}
}

0 comments on commit 48a2f5a

Please sign in to comment.