From b6a11204042e4e8df3c1eef89e16aa1ba00a1596 Mon Sep 17 00:00:00 2001 From: Antonin Delpeuch Date: Tue, 1 Oct 2024 15:57:17 +0200 Subject: [PATCH] Fix fetching of empty categories (#125) Closes #124 --- .../commons/importer/CommonsImporter.java | 47 ++++++++++--------- .../commons/importer/CommonsImporterTest.java | 37 +++++++++++++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/openrefine/extensions/commons/importer/CommonsImporter.java b/src/main/java/org/openrefine/extensions/commons/importer/CommonsImporter.java index 14cc663..004d62b 100644 --- a/src/main/java/org/openrefine/extensions/commons/importer/CommonsImporter.java +++ b/src/main/java/org/openrefine/extensions/commons/importer/CommonsImporter.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.Iterators; import com.google.refine.ProjectMetadata; +import com.google.refine.importers.ImporterUtilities; import com.google.refine.importers.TabularImportingParserBase; import com.google.refine.importing.ImportingJob; import com.google.refine.model.Column; @@ -76,6 +77,30 @@ static public void parse( // initializes progress reporting with the name of the first category setProgress(job, categoriesWithDepth.get(0).categoryName, 0); + + // pre-allocate columns + List columnNames = new ArrayList<>(); + Column col = ImporterUtilities.getOrAllocateColumn(project, columnNames, 0, false); + StandardReconConfig cfg = new StandardReconConfig( + service, + "https://commons.wikimedia.org/entity/", + "http://www.wikidata.org/prop/direct/", + "", + "entity", + true, + 1, + new ArrayList()); + col.setReconStats(ReconStats.create(project, 0)); + col.setReconConfig(cfg); + col.setName("File"); + if (mIdsColumn) { + ImporterUtilities.getOrAllocateColumn(project, columnNames, 1, false).setName("M-ids"); + if (categoriesColumn) { + ImporterUtilities.getOrAllocateColumn(project, columnNames, 2, false).setName("Categories"); + } + } else if (categoriesColumn) { + ImporterUtilities.getOrAllocateColumn(project, columnNames, 1, false).setName("Categories"); + } for(CategoryWithDepth categoryWithDepth: categoriesWithDepth) { fetchedFiles = Iterators.concat(fetchedFiles, @@ -96,28 +121,6 @@ static public void parse( exceptions ); - Column col = project.columnModel.columns.get(0); - StandardReconConfig cfg = new StandardReconConfig( - service, - "https://commons.wikimedia.org/entity/", - "http://www.wikidata.org/prop/direct/", - "", - "entity", - true, - 1, - new ArrayList()); - col.setReconStats(ReconStats.create(project, 0)); - col.setReconConfig(cfg); - col.setName("File"); - if (mIdsColumn) { - project.columnModel.columns.get(1).setName("M-ids"); - if (categoriesColumn) { - project.columnModel.columns.get(2).setName("Categories"); - } - } else if (categoriesColumn) { - project.columnModel.columns.get(1).setName("Categories"); - } - setProgress(job, categoriesWithDepth.get(categoriesWithDepth.size()-1).categoryName, 100); } diff --git a/src/test/java/org/openrefine/extensions/commons/importer/CommonsImporterTest.java b/src/test/java/org/openrefine/extensions/commons/importer/CommonsImporterTest.java index 46b8514..1f40039 100644 --- a/src/test/java/org/openrefine/extensions/commons/importer/CommonsImporterTest.java +++ b/src/test/java/org/openrefine/extensions/commons/importer/CommonsImporterTest.java @@ -70,4 +70,41 @@ public void testParse() throws Exception { } } + + /** + * Test column names upon project creation as well as reconciled cells + */ + @Test + public void testParseEmptyCategory() throws Exception { + + try (MockWebServer server = new MockWebServer()) { + server.start(); + HttpUrl url = server.url("/w/api.php"); + String jsonResponse = "{\"batchcomplete\":\"\",\"query\":{\"categorymembers\":[]}}"; + server.enqueue(new MockResponse().setBody(jsonResponse)); + servlet = new RefineServlet(); + ImportingManager.initialize(servlet); + ProjectManager.singleton = Mockito.mock(ProjectManager.class); + project = new Project(); + metadata = new ProjectMetadata(); + metadata.setName("Commons Import Test Project"); + job = Mockito.mock(ImportingJob.class); + ObjectNode options = ParsingUtilities.evaluateJsonStringToObjectNode( + "{\"categoryJsonValue\":[{\"category\":\"Category:Costa Rica\",\"depth\":\"0\"}],\"skipDataLines\":0," + + "\"limit\":-1,\"disableAutoPreview\":false,\"categoriesColumn\":true,\"mIdsColumn\":true}"); + List exceptions = new ArrayList(); + CommonsImporter importer = new CommonsImporter(); + + importer.setApiUrl(url.toString()); + CommonsImporter.parse(project, metadata, job, 0, options, exceptions); + project.update(); + + Assert.assertEquals(project.rows.size(), 0); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "File"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "M-ids"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "Categories"); + server.close(); + + } + } }