Skip to content

Commit

Permalink
feat: Normalized video coordinate formula
Browse files Browse the repository at this point in the history
  • Loading branch information
insung3511 committed Sep 3, 2023
1 parent a6927b1 commit 02ad3e2
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 22 deletions.
Binary file modified dummy.mp4
Binary file not shown.
30 changes: 20 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
EXTRACTOR_THRESHOLD = 0.85

app = FastAPI()
extractor = SkeletonExtractor(pretrained_bool=True, number_of_keypoints=17, device='cuda')
extractor = SkeletonExtractor(pretrained_bool=True, number_of_keypoints=17, device='mps')
preprocessor = DataPreprocessing()
metrics = Metrics()

Expand Down Expand Up @@ -64,10 +64,10 @@ async def registerVideo(
print(f"[INFO/REGISTER] Video register request has been received.")
print(f"[INFO/REGISTER] Extractor threshold: {EXTRACTOR_THRESHOLD}")

video_tensor = preprocessor.processing(video_file=video_file, temp_video_file_path=DUMMY_VIDEO_FILE_NAME)
video_tensor, video_heigth, video_width = preprocessor.processing(video_file=video_file, temp_video_file_path=DUMMY_VIDEO_FILE_NAME)
skeletons, video_length = extractor.extract(video_tensor=video_tensor, score_threshold=EXTRACTOR_THRESHOLD, video_length=None)

return {"skeletons": skeletons, "video_length": video_length}
return {"skeletons": skeletons, "video_length": video_length, "video_heigth": video_heigth, "video_width": video_width}

@app.post("/getMetricsConsumer")
async def getMetricsConsumer(
Expand Down Expand Up @@ -100,22 +100,32 @@ async def getMetricsConsumer(

# Below code will be also used in the database query.
# JSON URL is the 8th column of the table. VNO is the user selected video number.

json_url = result[vno, 7]
response = requests.get(json_url)
guide_skeleton = json.loads(response.text)

guide_video_height = result[vno, -2]
guide_video_width = result[vno, -1]
video_cut_point = result[vno, 8]
# video_cut_point = 10

# Extact consumer's skeleton.
video_tensor = preprocessor.processing(video_file, temp_video_file_path=DUMMY_VIDEO_FILE_NAME)
video_tensor, video_height, video_width = preprocessor.processing(video_file, temp_video_file_path=DUMMY_VIDEO_FILE_NAME)
skeletons, _ = extractor.extract(video_tensor=video_tensor, score_threshold=EXTRACTOR_THRESHOLD, video_length=video_cut_point)

# Cutting the skeleton
for key in skeletons.keys(): skeletons[key] = skeletons[key][:video_cut_point]
for key in guide_skeleton.keys(): guide_skeleton[key] = guide_skeleton[key][:video_cut_point]
# for key in skeletons.keys(): skeletons[key] = skeletons[key][:video_cut_point]
# for key in guide_skeleton.keys(): guide_skeleton[key] = guide_skeleton[key][:video_cut_point]

# Calculate metrics
score = metrics.score(
y_true=guide_skeleton,
true_video_height=guide_video_height,
true_video_width=guide_video_width,
true_cut_point=video_cut_point,
y_pred=skeletons,
pred_video_height=video_height,
pred_video_width=video_width
)

# Calculate metrics (Jaccard score)
score = metrics.score(y_true=guide_skeleton, y_pred=skeletons)

return {"metrics": score}
45 changes: 33 additions & 12 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,24 @@ def processing(self, video_file, temp_video_file_path: str = "temp.webm"):
file_ext = video_file.filename.split(".")[-1]
file_ext = temp_video_file_path.split(".")[0] + "." + file_ext
video = self.__save_and_read_video_file(video_file, file_ext)
return video
video_height, video_width = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(video.get(cv2.CAP_PROP_FRAME_WIDTH))

return video, video_height, video_width

class Metrics:
def __video_normalize(self, skeleton: dict, video_height: int, video_width: int, cut_point: int) -> dict:
for key in skeleton.keys():
for idx in range(len(skeleton[key])):
skeleton[key][idx] = (
skeleton[key][idx][0] / video_width,
skeleton[key][idx][1] / video_height
)

for key in skeleton.keys():
skeleton[key] = skeleton[key][:cut_point]

return skeleton

def __jaccard_score(self, y_true: list, y_pred: list) -> float:
"""Returns the jaccard score of the two arrays.
The jaccard score is calculated as follows:
Expand Down Expand Up @@ -276,23 +291,29 @@ def __normalized_mean_squared_error(self, y_true: list, y_pred: list) -> float:
metrics = np.sum((y_true - y_pred) ** 2) / np.sum((y_true - y_true.mean()) ** 2)
return metrics

def score(self, y_true: dict, y_pred: dict) -> float:
def score(self,
y_true: dict, true_video_height: int, true_video_width: int, true_cut_point: int,
y_pred: dict, pred_video_height: int, pred_video_width: int) -> float:
"""Returns the score of the two arrays.
The score is calculated as follows:
score = jaccard_score(y_true, y_pred)
score = (jaccard_score + normalized_mean_squared_error) / 2
Args:
y_true (np.ndarray): The ground truth array.
true_video_height (int): The height of the video that the ground truth array is extracted from.
true_video_width (int): The width of the video that the ground truth array is extracted from.
y_pred (np.ndarray): The predicted array.
pred_video_height (int): The height of the video that the predicted array is extracted from.
pred_video_width (int): The width of the video that the predicted array is extracted from.
Returns:
float: The score of the two arrays."""
scores = []
for key in y_true:
scores.append(
self.__normalized_mean_squared_error(y_true[key], y_pred[key])
)
y_true = self.__video_normalize(y_true, true_video_height, true_video_width, true_cut_point)
y_pred = self.__video_normalize(y_pred, pred_video_height, pred_video_width, true_cut_point)

score = np.mean(scores)
return score
score = 0.
for key in y_true.keys():
score += self.__jaccard_score(y_true[key], y_pred[key])
score /= len(y_true.keys())

return score

0 comments on commit 02ad3e2

Please sign in to comment.