diff --git a/src/main/scala/io/archivesunleashed/app/AudioInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/AudioInformationExtractor.scala index c162b51a..2d269082 100644 --- a/src/main/scala/io/archivesunleashed/app/AudioInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/AudioInformationExtractor.scala @@ -34,6 +34,7 @@ object AudioInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/ImageInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/ImageInformationExtractor.scala index aa968459..d4ab80fa 100644 --- a/src/main/scala/io/archivesunleashed/app/ImageInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/ImageInformationExtractor.scala @@ -34,6 +34,7 @@ object ImageInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/PDFInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/PDFInformationExtractor.scala index ea51374b..2d105c95 100644 --- a/src/main/scala/io/archivesunleashed/app/PDFInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/PDFInformationExtractor.scala @@ -34,6 +34,7 @@ object PDFInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/PresentationProgramInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/PresentationProgramInformationExtractor.scala index 98594fd6..0db5868c 100644 --- a/src/main/scala/io/archivesunleashed/app/PresentationProgramInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/PresentationProgramInformationExtractor.scala @@ -34,6 +34,7 @@ object PresentationProgramInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/SpreadsheetInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/SpreadsheetInformationExtractor.scala index e303add0..1ca25ac7 100644 --- a/src/main/scala/io/archivesunleashed/app/SpreadsheetInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/SpreadsheetInformationExtractor.scala @@ -34,6 +34,7 @@ object SpreadsheetInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/VideoInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/VideoInformationExtractor.scala index 1cd3e392..f0839195 100644 --- a/src/main/scala/io/archivesunleashed/app/VideoInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/VideoInformationExtractor.scala @@ -34,6 +34,7 @@ object VideoInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/main/scala/io/archivesunleashed/app/WordProcessorInformationExtractor.scala b/src/main/scala/io/archivesunleashed/app/WordProcessorInformationExtractor.scala index a314a7cb..a8424741 100644 --- a/src/main/scala/io/archivesunleashed/app/WordProcessorInformationExtractor.scala +++ b/src/main/scala/io/archivesunleashed/app/WordProcessorInformationExtractor.scala @@ -34,6 +34,7 @@ object WordProcessorInformationExtractor { import spark.implicits._ // scalastyle:on d.select( + $"crawl_date", $"url", $"filename", $"extension", diff --git a/src/test/scala/io/archivesunleashed/app/AudioInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/AudioInformationExtractorTest.scala index b579b905..b7dd22bf 100644 --- a/src/test/scala/io/archivesunleashed/app/AudioInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/AudioInformationExtractorTest.scala @@ -45,13 +45,14 @@ class AudioInformationExtractorTest extends FunSuite with BeforeAndAfter { val RESULTSLENGTH = 1 assert(dfResults.length == RESULTSLENGTH) - assert(dfResults(0).get(0) == "https://ruebot.net/files/feniz.mp3") - assert(dfResults(0).get(1) == "feniz.mp3") - assert(dfResults(0).get(2) == "mp3") - assert(dfResults(0).get(3) == "audio/mpeg") + assert(dfResults(0).get(0) == "20190817") + assert(dfResults(0).get(1) == "https://ruebot.net/files/feniz.mp3") + assert(dfResults(0).get(2) == "feniz.mp3") + assert(dfResults(0).get(3) == "mp3") assert(dfResults(0).get(4) == "audio/mpeg") - assert(dfResults(0).get(5) == "f7e7ec84b12c294e19af1ba41732c733") - assert(dfResults(0).get(6) == "a3eb95dbbea76460529d0d9ebdde5faabaff544a") + assert(dfResults(0).get(5) == "audio/mpeg") + assert(dfResults(0).get(6) == "f7e7ec84b12c294e19af1ba41732c733") + assert(dfResults(0).get(7) == "a3eb95dbbea76460529d0d9ebdde5faabaff544a") } after { diff --git a/src/test/scala/io/archivesunleashed/app/ImageInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/ImageInformationExtractorTest.scala index 829cbc5d..a414cf20 100644 --- a/src/test/scala/io/archivesunleashed/app/ImageInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ImageInformationExtractorTest.scala @@ -44,15 +44,16 @@ class ImageInformationExtractorTest extends FunSuite with BeforeAndAfter { val RESULTSLENGTH = 55 assert(dfResults.length == RESULTSLENGTH) - assert(dfResults(0).get(0) == "http://www.archive.org/images/logoc.jpg") - assert(dfResults(0).get(1) == "logoc.jpg") - assert(dfResults(0).get(2) == "jpg") - assert(dfResults(0).get(3) == "image/jpeg") + assert(dfResults(0).get(0) == "20080430") + assert(dfResults(0).get(1) == "http://www.archive.org/images/logoc.jpg") + assert(dfResults(0).get(2) == "logoc.jpg") + assert(dfResults(0).get(3) == "jpg") assert(dfResults(0).get(4) == "image/jpeg") - assert(dfResults(0).get(5) == 70) - assert(dfResults(0).get(6) == 56) - assert(dfResults(0).get(7) == "8211d1fbb9b03d8522a1ae378f9d1b24") - assert(dfResults(0).get(8) == "a671e68fc211ee4996a91e99297f246b2c5faa1a") + assert(dfResults(0).get(5) == "image/jpeg") + assert(dfResults(0).get(6) == 70) + assert(dfResults(0).get(7) == 56) + assert(dfResults(0).get(8) == "8211d1fbb9b03d8522a1ae378f9d1b24") + assert(dfResults(0).get(9) == "a671e68fc211ee4996a91e99297f246b2c5faa1a") } after { diff --git a/src/test/scala/io/archivesunleashed/app/PDFInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/PDFInformationExtractorTest.scala index ab88280c..3690c8a0 100644 --- a/src/test/scala/io/archivesunleashed/app/PDFInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/PDFInformationExtractorTest.scala @@ -45,17 +45,18 @@ class PDFInformationExtractorTest extends FunSuite with BeforeAndAfter { val RESULTSLENGTH = 2 assert(dfResults.length == RESULTSLENGTH) + assert(dfResults(0).get(0) == "20190812") assert( dfResults(0).get( - 0 + 1 ) == "https://yorkspace.library.yorku.ca/xmlui/bitstream/handle/10315/36158/cost-analysis.pdf?sequence=1&isAllowed=y" ) - assert(dfResults(0).get(1) == "cost-analysis.pdf") - assert(dfResults(0).get(2) == "pdf") - assert(dfResults(0).get(3) == "application/pdf") + assert(dfResults(0).get(2) == "cost-analysis.pdf") + assert(dfResults(0).get(3) == "pdf") assert(dfResults(0).get(4) == "application/pdf") - assert(dfResults(0).get(5) == "aaba59d2287afd40c996488a39bbc0dd") - assert(dfResults(0).get(6) == "569c28e0e8faa6945d6ca88fcd9e195825052c71") + assert(dfResults(0).get(5) == "application/pdf") + assert(dfResults(0).get(6) == "aaba59d2287afd40c996488a39bbc0dd") + assert(dfResults(0).get(7) == "569c28e0e8faa6945d6ca88fcd9e195825052c71") } after { diff --git a/src/test/scala/io/archivesunleashed/app/PresentationProgramInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/PresentationProgramInformationExtractorTest.scala index 6bdfee35..5daab598 100644 --- a/src/test/scala/io/archivesunleashed/app/PresentationProgramInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/PresentationProgramInformationExtractorTest.scala @@ -47,25 +47,26 @@ class PresentationProgramInformationExtractorTest val RESULTSLENGTH = 2 assert(dfResults.length == RESULTSLENGTH) + assert(dfResults(0).get(0) == "20190815") assert( dfResults(0).get( - 0 + 1 ) == "https://ruebot.net/files/aut-test-fixtures/aut-test-fixtures.pptx" ) - assert(dfResults(0).get(1) == "aut-test-fixtures.pptx") - assert(dfResults(0).get(2) == "pptx") + assert(dfResults(0).get(2) == "aut-test-fixtures.pptx") + assert(dfResults(0).get(3) == "pptx") assert( dfResults(0).get( - 3 + 4 ) == "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) assert( dfResults(0).get( - 4 + 5 ) == "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) - assert(dfResults(0).get(5) == "7a7b1fe4b6d311376eaced9de3b682ee") - assert(dfResults(0).get(6) == "86fadca47b134b68247ccde62da4ce3f62b4d2ec") + assert(dfResults(0).get(6) == "7a7b1fe4b6d311376eaced9de3b682ee") + assert(dfResults(0).get(7) == "86fadca47b134b68247ccde62da4ce3f62b4d2ec") } after { diff --git a/src/test/scala/io/archivesunleashed/app/SpreadsheetInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/SpreadsheetInformationExtractorTest.scala index 79b8c781..50df3719 100644 --- a/src/test/scala/io/archivesunleashed/app/SpreadsheetInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/SpreadsheetInformationExtractorTest.scala @@ -45,21 +45,22 @@ class SpreadsheetInformationExtractorTest extends FunSuite with BeforeAndAfter { val RESULTSLENGTH = 4 assert(dfResults.length == RESULTSLENGTH) + assert(dfResults(0).get(0) == "20190815") assert( dfResults(0).get( - 0 + 1 ) == "https://ruebot.net/files/aut-test-fixtures/test-aut-fixture.ods" ) - assert(dfResults(0).get(1) == "test-aut-fixture.ods") - assert(dfResults(0).get(2) == "ods") + assert(dfResults(0).get(2) == "test-aut-fixture.ods") + assert(dfResults(0).get(3) == "ods") assert( - dfResults(0).get(3) == "application/vnd.oasis.opendocument.spreadsheet" + dfResults(0).get(4) == "application/vnd.oasis.opendocument.spreadsheet" ) assert( - dfResults(0).get(4) == "application/vnd.oasis.opendocument.spreadsheet" + dfResults(0).get(5) == "application/vnd.oasis.opendocument.spreadsheet" ) - assert(dfResults(0).get(5) == "7f70280757d8beb2d1bfd6fb1b6ae6e9") - assert(dfResults(0).get(6) == "448c357e78317877a98a399448031a89f1dda6fb") + assert(dfResults(0).get(6) == "7f70280757d8beb2d1bfd6fb1b6ae6e9") + assert(dfResults(0).get(7) == "448c357e78317877a98a399448031a89f1dda6fb") } after { diff --git a/src/test/scala/io/archivesunleashed/app/VideoInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/VideoInformationExtractorTest.scala index ac525428..6cb8ceb3 100644 --- a/src/test/scala/io/archivesunleashed/app/VideoInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/VideoInformationExtractorTest.scala @@ -45,15 +45,16 @@ class VideoInformationExtractorTest extends FunSuite with BeforeAndAfter { val RESULTSLENGTH = 1 assert(dfResults.length == RESULTSLENGTH) + assert(dfResults(0).get(0) == "20190817") assert( - dfResults(0).get(0) == "https://ruebot.net/2018-11-12%2016.14.11.mp4" + dfResults(0).get(1) == "https://ruebot.net/2018-11-12%2016.14.11.mp4" ) - assert(dfResults(0).get(1) == "2018-11-12%2016.14.11.mp4") - assert(dfResults(0).get(2) == "mp4") - assert(dfResults(0).get(3) == "video/mp4") + assert(dfResults(0).get(2) == "2018-11-12%2016.14.11.mp4") + assert(dfResults(0).get(3) == "mp4") assert(dfResults(0).get(4) == "video/mp4") - assert(dfResults(0).get(5) == "2cde7de3213a87269957033f6315fce2") - assert(dfResults(0).get(6) == "f28c72fa4c0464a1a2b81fdc539b28cf574ac4c2") + assert(dfResults(0).get(5) == "video/mp4") + assert(dfResults(0).get(6) == "2cde7de3213a87269957033f6315fce2") + assert(dfResults(0).get(7) == "f28c72fa4c0464a1a2b81fdc539b28cf574ac4c2") } after { diff --git a/src/test/scala/io/archivesunleashed/app/WordProcessorInformationExtractorTest.scala b/src/test/scala/io/archivesunleashed/app/WordProcessorInformationExtractorTest.scala index 8ea033d3..dca8350c 100644 --- a/src/test/scala/io/archivesunleashed/app/WordProcessorInformationExtractorTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WordProcessorInformationExtractorTest.scala @@ -47,17 +47,18 @@ class WordProcessorInformationExtractorTest val RESULTSLENGTH = 3 assert(dfResults.length == RESULTSLENGTH) + assert(dfResults(0).get(0) == "20190815") assert( dfResults(0).get( - 0 + 1 ) == "https://ruebot.net/files/aut-test-fixtures/test-aut-fixtures.rtf" ) - assert(dfResults(0).get(1) == "test-aut-fixtures.rtf") - assert(dfResults(0).get(2) == "rtf") - assert(dfResults(0).get(3) == "application/rtf") + assert(dfResults(0).get(2) == "test-aut-fixtures.rtf") + assert(dfResults(0).get(3) == "rtf") assert(dfResults(0).get(4) == "application/rtf") - assert(dfResults(0).get(5) == "e483512b65ba44d71e843c57de2adeb7") - assert(dfResults(0).get(6) == "8cf3066421f0a07fcd6e7a3e86ebd447edf7cfcb") + assert(dfResults(0).get(5) == "application/rtf") + assert(dfResults(0).get(6) == "e483512b65ba44d71e843c57de2adeb7") + assert(dfResults(0).get(7) == "8cf3066421f0a07fcd6e7a3e86ebd447edf7cfcb") } after {