Skip to content

Commit

Permalink
Fix tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
ccl-core committed Sep 30, 2024
1 parent 911b7bd commit 2a614aa
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 31 deletions.
8 changes: 3 additions & 5 deletions datasets/1.0/huggingface-prism-alignment/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,9 @@
"dataType": "sc:Text"
}
],
"data": [
{
"conversations_splits/split_name": "train"
}
]
"data": {
"conversations_splits/split_name": "train"
}
},
{
"@type": "cr:RecordSet",
Expand Down
16 changes: 8 additions & 8 deletions datasets/1.0/pass-mini/output/images.jsonl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{"images/creator_uname": "PaperBird+Photography%3C3", "images/latitude": null, "images/longitude": null, "images/date_taken": "2007-05-06 06:11:48", "images/hash": "75f7305b1fd94044e14bdcdde469dbb2", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "Chiara+Marra", "images/latitude": 38.23818, "images/longitude": 13.183593, "images/date_taken": "2007-05-04 15:46:43", "images/hash": "dd571a41a015354d92a859f7ef31201", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "maplesbranch", "images/latitude": null, "images/longitude": null, "images/date_taken": "2006-05-01 07:34:13", "images/hash": "598ad3bc7e6e876e61af116693c7ad9", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "maplesbranch", "images/latitude": null, "images/longitude": null, "images/date_taken": "2006-04-23 19:20:40", "images/hash": "e48d6d552465c5728585b82a53d6e02c", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "quinnums", "images/latitude": null, "images/longitude": null, "images/date_taken": "2004-05-17 00:44:29", "images/hash": "ffd3eb12a16cb83138f26e6f36dec967", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "striatic", "images/latitude": 53.535233, "images/longitude": -113.565075, "images/date_taken": "2004-05-11 02:00:33", "images/hash": "fff0eece99cc71c2e91fe716051599", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "striatic", "images/latitude": null, "images/longitude": null, "images/date_taken": "2004-05-27 10:34:28", "images/hash": "fedefe9f11bf2a749a749bfca8bf28", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/creator_uname": "quinnums", "images/latitude": null, "images/longitude": null, "images/date_taken": "2004-05-29 02:14:36", "images/hash": "ff379727f52bcec4dfb237ace41627", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>"}
{"images/hash": "75f7305b1fd94044e14bdcdde469dbb2", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "PaperBird+Photography%3C3", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2007-05-06 06:11:48"}
{"images/hash": "dd571a41a015354d92a859f7ef31201", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "Chiara+Marra", "images/gps_coordinates": {"images/latitude": 38.23818, "images/longitude": 13.183593}, "images/date_taken": "2007-05-04 15:46:43"}
{"images/hash": "598ad3bc7e6e876e61af116693c7ad9", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "maplesbranch", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2006-05-01 07:34:13"}
{"images/hash": "e48d6d552465c5728585b82a53d6e02c", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "maplesbranch", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2006-04-23 19:20:40"}
{"images/hash": "ffd3eb12a16cb83138f26e6f36dec967", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "quinnums", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2004-05-17 00:44:29"}
{"images/hash": "fff0eece99cc71c2e91fe716051599", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "striatic", "images/gps_coordinates": {"images/latitude": 53.535233, "images/longitude": -113.565075}, "images/date_taken": "2004-05-11 02:00:33"}
{"images/hash": "fedefe9f11bf2a749a749bfca8bf28", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "striatic", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2004-05-27 10:34:28"}
{"images/hash": "ff379727f52bcec4dfb237ace41627", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/creator_uname": "quinnums", "images/gps_coordinates": {"images/latitude": null, "images/longitude": null}, "images/date_taken": "2004-05-29 02:14:36"}
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def __call__(self, set_output_in_memory: bool = False) -> OutputT:
inputs = self.inputs
output = self.call() if inputs is None else self.call(*inputs)
if isinstance(output, types.GeneratorType) and set_output_in_memory:
output = pd.DataFrame(output)
output = pd.DataFrame(output) # type:ignore
self.set_output(output)
return output

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def _parse_jsonpath(json_path: str):
return jsonpath_rw.parse(json_path)


def _is_repeated_field(field: Field) -> bool:
return hasattr(field, "repeated") and field.repeated
def _is_repeated_field(field: Field | None) -> bool | None:
return isinstance(field, Field) and field.repeated


def _apply_transform_fn(value: Any, transform: Transform, field: Field) -> Any:
Expand Down Expand Up @@ -176,6 +176,7 @@ def call(self, df: pd.DataFrame) -> Iterator[dict[str, Any]]:
df = _extract_value(df, field)

def _get_result(row):
"""Returns a record parsed as a dictionary of fields."""
result: dict[str, Any] = {}
for field in fields:
source = field.source
Expand All @@ -202,23 +203,30 @@ def _get_result(row):
result[field.id] = value
else:
# Repeated nested sub-fields render as a list of dictionaries.
if _is_repeated_field(field.parent):
if field.parent.id not in result:
result[field.parent.id] = [{field.id: v} for v in value]
if field.parent:
if _is_repeated_field(field.parent):
if field.parent.id not in result:
result[field.parent.id] = [
{field.id: v} for v in value
]
else:
if len(value) != len(result[field.parent.id]):
raise ValueError(
f"Lenghts of {field.id} doesn't match"
" already stored items for"
f" {field.parent.id}"
)
for i, v in enumerate(value):
result[field.parent.id][i][field.id] = v
# Non-repeated subfields render as a single dictionary.
else:
if len(value) != len(result[field.parent.id]):
raise ValueError(
f"Lenghts of {field.id} doesn't match already"
f" stored items for {field.parent.id}"
)
for i, v in enumerate(value):
result[field.parent.id][i][field.id] = v
# Non-repeated subfields renders as a single dictionary.
if field.parent.id not in result:
result[field.parent.id] = {}
result[field.parent.id][field.id] = value
else:
if field.parent.id not in result:
result[field.parent.id] = {}
result[field.parent.id][field.id] = value

raise ValueError(
f"The field {field.id} is a SubField but has no parent."
)
return result

chunk_size = 100
Expand Down

0 comments on commit 2a614aa

Please sign in to comment.