Skip to content

Commit

Permalink
Fix fetching of empty categories (#125)
Browse files Browse the repository at this point in the history
Closes #124
  • Loading branch information
wetneb authored Oct 1, 2024
1 parent 8b202dd commit b6a1120
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.Iterators;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Column;
Expand Down Expand Up @@ -76,6 +77,30 @@ static public void parse(

// initializes progress reporting with the name of the first category
setProgress(job, categoriesWithDepth.get(0).categoryName, 0);

// pre-allocate columns
List<String> columnNames = new ArrayList<>();
Column col = ImporterUtilities.getOrAllocateColumn(project, columnNames, 0, false);
StandardReconConfig cfg = new StandardReconConfig(
service,
"https://commons.wikimedia.org/entity/",
"http://www.wikidata.org/prop/direct/",
"",
"entity",
true,
1,
new ArrayList<ColumnDetail>());
col.setReconStats(ReconStats.create(project, 0));
col.setReconConfig(cfg);
col.setName("File");
if (mIdsColumn) {
ImporterUtilities.getOrAllocateColumn(project, columnNames, 1, false).setName("M-ids");
if (categoriesColumn) {
ImporterUtilities.getOrAllocateColumn(project, columnNames, 2, false).setName("Categories");
}
} else if (categoriesColumn) {
ImporterUtilities.getOrAllocateColumn(project, columnNames, 1, false).setName("Categories");
}

for(CategoryWithDepth categoryWithDepth: categoriesWithDepth) {
fetchedFiles = Iterators.concat(fetchedFiles,
Expand All @@ -96,28 +121,6 @@ static public void parse(
exceptions
);

Column col = project.columnModel.columns.get(0);
StandardReconConfig cfg = new StandardReconConfig(
service,
"https://commons.wikimedia.org/entity/",
"http://www.wikidata.org/prop/direct/",
"",
"entity",
true,
1,
new ArrayList<ColumnDetail>());
col.setReconStats(ReconStats.create(project, 0));
col.setReconConfig(cfg);
col.setName("File");
if (mIdsColumn) {
project.columnModel.columns.get(1).setName("M-ids");
if (categoriesColumn) {
project.columnModel.columns.get(2).setName("Categories");
}
} else if (categoriesColumn) {
project.columnModel.columns.get(1).setName("Categories");
}

setProgress(job, categoriesWithDepth.get(categoriesWithDepth.size()-1).categoryName, 100);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,41 @@ public void testParse() throws Exception {

}
}

/**
* Test column names upon project creation as well as reconciled cells
*/
@Test
public void testParseEmptyCategory() throws Exception {

try (MockWebServer server = new MockWebServer()) {
server.start();
HttpUrl url = server.url("/w/api.php");
String jsonResponse = "{\"batchcomplete\":\"\",\"query\":{\"categorymembers\":[]}}";
server.enqueue(new MockResponse().setBody(jsonResponse));
servlet = new RefineServlet();
ImportingManager.initialize(servlet);
ProjectManager.singleton = Mockito.mock(ProjectManager.class);
project = new Project();
metadata = new ProjectMetadata();
metadata.setName("Commons Import Test Project");
job = Mockito.mock(ImportingJob.class);
ObjectNode options = ParsingUtilities.evaluateJsonStringToObjectNode(
"{\"categoryJsonValue\":[{\"category\":\"Category:Costa Rica\",\"depth\":\"0\"}],\"skipDataLines\":0,"
+ "\"limit\":-1,\"disableAutoPreview\":false,\"categoriesColumn\":true,\"mIdsColumn\":true}");
List<Exception> exceptions = new ArrayList<Exception>();
CommonsImporter importer = new CommonsImporter();

importer.setApiUrl(url.toString());
CommonsImporter.parse(project, metadata, job, 0, options, exceptions);
project.update();

Assert.assertEquals(project.rows.size(), 0);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "File");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "M-ids");
Assert.assertEquals(project.columnModel.columns.get(2).getName(), "Categories");
server.close();

}
}
}

0 comments on commit b6a1120

Please sign in to comment.