From 3dc3482c925346924ba911dd1add1998a045bbda Mon Sep 17 00:00:00 2001
From: Mithun Das <53312122+das-mithun@users.noreply.github.com>
Date: Mon, 17 Jun 2024 17:05:26 +0530
Subject: [PATCH] Update README.md

---
 Codes/README.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/Codes/README.md b/Codes/README.md
index f72eed0..597d734 100644
--- a/Codes/README.md
+++ b/Codes/README.md
@@ -22,7 +22,7 @@
 # To Run the unimodal Vision Based models
 
 6.Vision+lstm_foldWise.py   
-7. 3DCNN_withFolds.py
+7.3DCNN_withFolds.py
 
 # To Run the Multimodal Model
        
@@ -30,3 +30,15 @@
 
 # To extract all the video frames.
 frameExtract.py
+
+# Extraction of transcript
+
+The 'all__video_vosk_audioMap.p' has to be generated using the Vosk speech recognition toolkit(https://alphacephei.com/vosk/). The format of the file is in JSON format like the below:
+
+{
+  "video_name1": "transcript1",
+  "video_name2": "transcript2",
+  ...
+  "video_name3": "transcript3"
+}
+