Skip to content

Commit

Permalink
Update metadata and output of the datasets.
Browse files Browse the repository at this point in the history
  • Loading branch information
ccl-core committed Sep 23, 2024
1 parent 4da67de commit 5618d4b
Show file tree
Hide file tree
Showing 59 changed files with 1,816 additions and 1,772 deletions.
2 changes: 1 addition & 1 deletion datasets/1.0/audio_test/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
{
"@type": "cr:Field",
"@id": "records/audio",
"name": "audio",
"name": "records/audio",
"description": "These are the sounds.",
"dataType": "sc:AudioObject",
"source": {
Expand Down
4 changes: 2 additions & 2 deletions datasets/1.0/audio_test/output/records.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"audio": "(array([-2.8619270e-13, -1.7014803e-13, 2.7065091e-14, ...,\n -6.4091455e-06, -3.7976279e-06, 2.7510678e-06], dtype=float32), 22050)"}
{"audio": "(array([5.8726583e-14, 1.3397688e-13, 2.2199205e-13, ..., 4.2678180e-04,\n 1.9029720e-04, 2.7079385e-04], dtype=float32), 22050)"}
{"records/audio": "(array([-2.8619270e-13, -1.7014803e-13, 2.7065091e-14, ...,\n -6.4091455e-06, -3.7976279e-06, 2.7510678e-06], dtype=float32), 22050)"}
{"records/audio": "(array([5.8726583e-14, 1.3397688e-13, 2.2199205e-13, ..., 4.2678180e-04,\n 1.9029720e-04, 2.7079385e-04], dtype=float32), 22050)"}
52 changes: 26 additions & 26 deletions datasets/1.0/bigcode-the-stack/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
{
"@type": "cr:Field",
"@id": "default/content",
"name": "content",
"name": "default/content",
"description": "The content of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -136,7 +136,7 @@
{
"@type": "cr:Field",
"@id": "default/language",
"name": "language",
"name": "default/language",
"description": "Programming language of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -151,7 +151,7 @@
{
"@type": "cr:Field",
"@id": "default/hexsha",
"name": "hexsha",
"name": "default/hexsha",
"description": "Unique git hash of file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -166,7 +166,7 @@
{
"@type": "cr:Field",
"@id": "default/size",
"name": "size",
"name": "default/size",
"description": "Size of the uncompressed file..",
"dataType": "sc:Integer",
"source": {
Expand All @@ -181,7 +181,7 @@
{
"@type": "cr:Field",
"@id": "default/ext",
"name": "ext",
"name": "default/ext",
"description": "File extension.",
"dataType": "sc:Text",
"source": {
Expand All @@ -196,7 +196,7 @@
{
"@type": "cr:Field",
"@id": "default/lang",
"name": "lang",
"name": "default/lang",
"description": "Hash of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -211,7 +211,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_path",
"name": "max_stars_repo_path",
"name": "default/max_stars_repo_path",
"description": "Path to file in repo containing this file with maximum number of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -226,7 +226,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_name",
"name": "max_stars_repo_name",
"name": "default/max_stars_repo_name",
"description": "Name of repo containing this file with maximum number of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -241,7 +241,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_head_hexsha",
"name": "max_stars_repo_head_hexsha",
"name": "default/max_stars_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -256,7 +256,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_count",
"name": "max_stars_count",
"name": "default/max_stars_count",
"description": "Number of stars in repository.",
"dataType": "sc:Text",
"source": {
Expand All @@ -271,7 +271,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_stars_event_min_datetime",
"name": "max_stars_repo_stars_event_min_datetime",
"name": "default/max_stars_repo_stars_event_min_datetime",
"description": "First timestamp of a stars event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -286,7 +286,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_stars_event_max_datetime",
"name": "max_stars_repo_stars_event_max_datetime",
"name": "default/max_stars_repo_stars_event_max_datetime",
"description": "Last timestamp of a stars event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -301,7 +301,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_path",
"name": "max_issues_repo_path",
"name": "default/max_issues_repo_path",
"description": "Path to file in repo containing this file with maximum number of issues.",
"dataType": "sc:Text",
"source": {
Expand All @@ -316,7 +316,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_head_hexsha",
"name": "max_issues_repo_head_hexsha",
"name": "default/max_issues_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of issues.",
"dataType": "sc:Text",
"source": {
Expand All @@ -331,7 +331,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_count",
"name": "max_issues_count",
"name": "default/max_issues_count",
"description": "Number of issues in repository.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -346,7 +346,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_issues_event_min_datetime",
"name": "max_issues_repo_issues_event_min_datetime",
"name": "default/max_issues_repo_issues_event_min_datetime",
"description": "First timestamp of an issues event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -361,7 +361,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_issues_event_max_datetime",
"name": "max_issues_repo_issues_event_max_datetime",
"name": "default/max_issues_repo_issues_event_max_datetime",
"description": "Last timestamp of an issues event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -376,7 +376,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_path",
"name": "max_forks_repo_path",
"name": "default/max_forks_repo_path",
"description": "Path to file in repo containing this file with maximum number of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -391,7 +391,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_name",
"name": "max_forks_repo_name",
"name": "default/max_forks_repo_name",
"description": "Name of repo containing this file with maximum number of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -406,7 +406,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_head_hexsha",
"name": "max_forks_repo_head_hexsha",
"name": "default/max_forks_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -421,7 +421,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_count",
"name": "max_forks_count",
"name": "default/max_forks_count",
"description": "Number of forks in repository.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -436,7 +436,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_forks_event_min_datetime",
"name": "max_forks_repo_forks_event_min_datetime",
"name": "default/max_forks_repo_forks_event_min_datetime",
"description": "First timestamp of a forks event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -451,7 +451,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_forks_event_max_datetime",
"name": "max_forks_repo_forks_event_max_datetime",
"name": "default/max_forks_repo_forks_event_max_datetime",
"description": "Last timestamp of a forks event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -466,7 +466,7 @@
{
"@type": "cr:Field",
"@id": "default/avg_line_length",
"name": "avg_line_length",
"name": "default/avg_line_length",
"description": "The average line-length of the file.",
"dataType": "sc:Float",
"source": {
Expand All @@ -481,7 +481,7 @@
{
"@type": "cr:Field",
"@id": "default/max_line_length",
"name": "max_line_length",
"name": "default/max_line_length",
"description": "The maximum line-length of the file.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -496,7 +496,7 @@
{
"@type": "cr:Field",
"@id": "default/alphanum_fraction",
"name": "alphanum_fraction",
"name": "default/alphanum_fraction",
"description": "The fraction of characters in the file that are alphabetical or numerical characters.",
"dataType": "sc:Float",
"source": {
Expand Down
38 changes: 19 additions & 19 deletions datasets/1.0/coco2014-mini/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@
{
"@type": "cr:Field",
"@id": "split_enums/name",
"name": "name",
"name": "split_enums/name",
"description": "One of: train, val, test.",
"dataType": "sc:Text"
},
{
"@type": "cr:Field",
"@id": "split_enums/url",
"name": "url",
"name": "split_enums/url",
"description": "Corresponding mlcommons.org definition URL",
"dataType": [
"wd:Q3985153",
Expand All @@ -121,16 +121,16 @@
],
"data": [
{
"name": "train",
"url": "https://mlcommons.org/definitions/training_split"
"split_enums/name": "train",
"split_enums/url": "https://mlcommons.org/definitions/training_split"
},
{
"name": "val",
"url": "https://mlcommons.org/definitions/validation_split"
"split_enums/name": "val",
"split_enums/url": "https://mlcommons.org/definitions/validation_split"
},
{
"name": "test",
"url": "https://mlcommons.org/definitions/test_split"
"split_enums/name": "test",
"split_enums/url": "https://mlcommons.org/definitions/test_split"
}
]
},
Expand All @@ -145,7 +145,7 @@
{
"@type": "cr:Field",
"@id": "images/image_filename",
"name": "image_filename",
"name": "images/image_filename",
"description": "The filename of the image. eg: COCO_train2014_000000000003.jpg",
"dataType": "sc:Text",
"source": {
Expand All @@ -160,7 +160,7 @@
{
"@type": "cr:Field",
"@id": "images/image_content",
"name": "image_content",
"name": "images/image_content",
"description": "The content of the image.",
"dataType": "sc:ImageObject",
"source": {
Expand All @@ -175,7 +175,7 @@
{
"@type": "cr:Field",
"@id": "images/split",
"name": "split",
"name": "images/split",
"dataType": [
"wd:Q3985153",
"sc:Text"
Expand Down Expand Up @@ -210,7 +210,7 @@
{
"@type": "cr:Field",
"@id": "captions/id",
"name": "id",
"name": "captions/id",
"description": "The ID of the caption",
"dataType": "sc:Integer",
"source": {
Expand All @@ -225,7 +225,7 @@
{
"@type": "cr:Field",
"@id": "captions/image_id",
"name": "image_id",
"name": "captions/image_id",
"description": "The ID of the image",
"dataType": "sc:Integer",
"source": {
Expand All @@ -240,7 +240,7 @@
{
"@type": "cr:Field",
"@id": "captions/caption",
"name": "caption",
"name": "captions/caption",
"description": "The caption",
"dataType": [
"wd:Q18585177",
Expand All @@ -258,7 +258,7 @@
{
"@type": "cr:Field",
"@id": "captions/split",
"name": "split",
"name": "captions/split",
"dataType": [
"wd:Q3985153",
"sc:Text"
Expand Down Expand Up @@ -290,7 +290,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/id",
"name": "id",
"name": "bounding_boxes/id",
"description": "The ID of the annotation.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -305,7 +305,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/image_id",
"name": "image_id",
"name": "bounding_boxes/image_id",
"description": "The ID of the image.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -320,7 +320,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/bbox",
"name": "bbox",
"name": "bounding_boxes/bbox",
"description": "The bounding box on the image.",
"dataType": "cr:BoundingBox",
"source": {
Expand All @@ -335,7 +335,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/area",
"name": "area",
"name": "bounding_boxes/area",
"description": "The area of the bounding box.",
"dataType": "sc:Integer",
"source": {
Expand Down
8 changes: 4 additions & 4 deletions datasets/1.0/coco2014-mini/output/bounding_boxes.jsonl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"id": 86, "image_id": 318556, "bbox": "[116.95, 305.86, 285.3, 266.03]", "area": 54652}
{"id": 89, "image_id": 116100, "bbox": "[245.54, 208.17, 40.14, 19.1]", "area": 421}
{"id": 93, "image_id": 318556, "bbox": "[288.4, 18.07, 211.6, 331.33]", "area": 53535}
{"id": 113, "image_id": 116100, "bbox": "[126.5, 475.24, 77.68, 76.73]", "area": 3892}
{"bounding_boxes/id": 86, "bounding_boxes/image_id": 318556, "bounding_boxes/bbox": "[116.95, 305.86, 285.3, 266.03]", "bounding_boxes/area": 54652}
{"bounding_boxes/id": 89, "bounding_boxes/image_id": 116100, "bounding_boxes/bbox": "[245.54, 208.17, 40.14, 19.1]", "bounding_boxes/area": 421}
{"bounding_boxes/id": 93, "bounding_boxes/image_id": 318556, "bounding_boxes/bbox": "[288.4, 18.07, 211.6, 331.33]", "bounding_boxes/area": 53535}
{"bounding_boxes/id": 113, "bounding_boxes/image_id": 116100, "bounding_boxes/bbox": "[126.5, 475.24, 77.68, 76.73]", "bounding_boxes/area": 3892}
Loading

0 comments on commit 5618d4b

Please sign in to comment.