Skip to content

Commit

Permalink
support metadata in dataloader (#197)
Browse files Browse the repository at this point in the history
  • Loading branch information
sainivedh authored Oct 16, 2023
1 parent d672ed4 commit 35b1c31
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ def __getitem__(self, index):
return VisualClassificationFeatures(
image_path=os.path.join(os.path.dirname(__file__), item[0]),
label=item[1],
id=os.path.basename(item[0]).split(".")[0])
id=os.path.basename(item[0]).split(".")[0],
metadata={
"split": self.split,
"image_path": item[0]
})

def __len__(self):
return len(self.data)
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ def load_data(self):
reader = csv.reader(_file)
next(reader, None) # skip header
for review in reader:
self.data.append({"text": review[0], "labels": review[1], "id": None})
self.data.append({
"text": review[0],
"labels": review[1],
"id": None,
"metadata": dict(split=self.split)
})

def __getitem__(self, idx):
item = self.data[idx]
Expand Down
4 changes: 4 additions & 0 deletions clarifai/datasets/upload/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class TextFeatures:
text: str
labels: List[Union[str, int]] # List[str or int] to cater for multi-class tasks
id: Optional[int] = None # text_id
metadata: Optional[dict] = None


@dataclass
Expand All @@ -18,6 +19,7 @@ class VisualClassificationFeatures:
label: Union[str, int]
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
id: Optional[int] = None # image_id
metadata: Optional[dict] = None


@dataclass
Expand All @@ -28,6 +30,7 @@ class VisualDetectionFeatures:
bboxes: List[List[float]]
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
id: Optional[int] = None # image_id
metadata: Optional[dict] = None


@dataclass
Expand All @@ -38,3 +41,4 @@ class VisualSegmentationFeatures:
polygons: List[List[List[float]]]
geo_info: Optional[List[float]] = None #[Longitude, Latitude]
id: Optional[int] = None # image_id
metadata: Optional[dict] = None
15 changes: 12 additions & 3 deletions clarifai/datasets/upload/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def process_datagen_item(id):
list) else [datagen_item.label] # clarifai concept
input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
geo_info = datagen_item.geo_info
metadata.update({"filename": os.path.basename(image_path), "split": self.split})
if datagen_item.metadata is not None:
metadata.update(datagen_item.metadata)
else:
metadata.update({"filename": os.path.basename(image_path), "split": self.split})

self.all_input_ids[id] = input_id
input_protos.append(
Expand Down Expand Up @@ -76,7 +79,10 @@ def process_datagen_item(id):
labels = datagen_item.classes # list:[l1,...,ln]
bboxes = datagen_item.bboxes # [[xmin,ymin,xmax,ymax],...,[xmin,ymin,xmax,ymax]]
input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
metadata.update({"filename": os.path.basename(image), "split": self.split})
if datagen_item.metadata is not None:
metadata.update(datagen_item.metadata)
else:
metadata.update({"filename": os.path.basename(image), "split": self.split})
geo_info = datagen_item.geo_info

self.all_input_ids[id] = input_id
Expand Down Expand Up @@ -126,7 +132,10 @@ def process_datagen_item(id):
labels = datagen_item.classes
_polygons = datagen_item.polygons # list of polygons: [[[x,y],...,[x,y]],...]
input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
metadata.update({"filename": os.path.basename(image), "split": self.split})
if datagen_item.metadata is not None:
metadata.update(datagen_item.metadata)
else:
metadata.update({"filename": os.path.basename(image), "split": self.split})
geo_info = datagen_item.geo_info

self.all_input_ids[id] = input_id
Expand Down
5 changes: 4 additions & 1 deletion clarifai/datasets/upload/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ def process_datagen_item(id):
labels = datagen_item.labels if isinstance(
datagen_item.labels, list) else [datagen_item.labels] # clarifai concept
input_id = f"{self.dataset_id}-{self.split}-{id}" if datagen_item.id is None else f"{self.dataset_id}-{self.split}-{str(datagen_item.id)}"
metadata.update({"split": self.split})
if datagen_item.metadata is not None:
metadata.update(datagen_item.metadata)
else:
metadata.update({"split": self.split})

self.all_input_ids[id] = input_id
input_protos.append(
Expand Down

0 comments on commit 35b1c31

Please sign in to comment.