fix some typos

OpenRobotLab · Dec 18, 2024 · e81e4ca · e81e4ca
1 parent 8a96384
commit e81e4ca
Show file tree

Hide file tree

Showing 18 changed files with 69 additions and 82 deletions.
diff --git a/README.md b/README.md
@@ -81,13 +81,13 @@ existing benchmarks and in-the-wild evaluation.
 
 1. Download the Embodiedscan and MMScan annotation. (Fill in the [form](https://docs.google.com/forms/d/e/1FAIpQLScUXEDTksGiqHZp31j7Zp7zlCNV7p_08uViwP_Nbzfn3g6hhw/viewform) to apply for downloading)
 
-   Create a folder `mmscan_data/` and then unzip the files. For the first zip file, put `embodiedscan` under `mmscan_data/embodiedscan-split` and rename it to `embodiedscan-v1`. For the second zip file, put `MMScan-beta-release` under `mmscan_data/MMScan-beta-release` and `embodiedscan-v2` under `mmscan_data/embodiedscan-split`.
+   Create a folder `mmscan_data/` and then unzip the files. For the first zip file, put `embodiedscan` under `mmscan_data/embodiedscan_split` and rename it to `embodiedscan-v1`. For the second zip file, put `MMScan-beta-release` under `mmscan_data/MMScan-beta-release` and `embodiedscan-v2` under `mmscan_data/embodiedscan_split`.
 
    The directory structure should be as below:
 
    ```
    mmscan_data
-   ├── embodiedscan-split
+   ├── embodiedscan_split
    │   ├──embodiedscan-v1/   # EmbodiedScan v1 data in 'embodiedscan.zip'
    │   ├──embodiedscan-v2/   # EmbodiedScan v2 data in 'embodiedscan-v2-beta.zip'
    ├── MMScan-beta-release   # MMScan veta data in 'embodiedscan-v2-beta.zip'
@@ -146,37 +146,21 @@ Each dataset item is a dictionary containing key elements:
 (2)  Language Modality
 
 - **"sub_class"**: Sample category.
-
 - **"ID"**: Unique sample ID.
-
 - **"scan_id"**: Corresponding scan ID.
-
-  **VG Task:**
-
+- **--------------For Visual Grounding Task**
 - **"target_id"** (list\[int\]): IDs of target objects.
-
 - **"text"** (str): Grounding text.
-
 - **"target"** (list\[str\]): Types of target objects.
-
 - **"anchors"** (list\[str\]): Types of anchor objects.
-
 - **"anchor_ids"** (list\[int\]): IDs of anchor objects.
-
 - **"tokens_positive"** (dict): Position indices of mentioned objects in the text.
-
-  **QA Task:**
-
+- **--------------ForQuestion Answering Task**
 - **"question"** (str): The question text.
-
 - **"answers"** (list\[str\]): List of possible answers.
-
 - **"object_ids"** (list\[int\]): Object IDs referenced in the question.
-
 - **"object_names"** (list\[str\]): Types of referenced objects.
-
 - **"input_bboxes_id"** (list\[int\]): IDs of input bounding boxes.
-
 - **"input_bboxes"** (list\[np.ndarray\]): Input bounding boxes, 9 DoF.
 
 (3) 2D Modality
@@ -324,5 +308,4 @@ We have adapted the MMScan API for some [models](./models/README.md).
 ## 📝 TODO List
 
 - \[ \] More Visual Grounding baselines and Question Answering baselines.
-- \[ \] Captioning benchmark update
 - \[ \] Full release and further updates.
diff --git a/data_preparation/README.md b/data_preparation/README.md
@@ -9,10 +9,10 @@ Detailed steps are shown as follows.
 
 3. Download Matterport3D data [HERE](https://github.com/niessner/Matterport). Link or move the folder to this level of directory.
 
-4. Organize the file structure. You are recommanded to create a soft link to the raw data folder under `mmscan_data/embodiedscan-split/data`.
+4. Organize the file structure. You are recommanded to create a soft link to the raw data folder under `mmscan_data/embodiedscan_split/data`.
 
    ```
-   mmscan_data/embodiedscan-split/data/
+   mmscan_data/embodiedscan_split/data/
    ├── scannet/
    │   ├── scans
    │   │   ├── <scene_id>
@@ -25,16 +25,16 @@ Detailed steps are shown as follows.
    │   ├── ...
    ```
 
-   Additionally, create a `process_pcd` folder under `mmscan_data/embodiedscan-split` to store the results. Similarly, we recommend using a symbolic link, as the total file size might be a little large (approximately 21GB)
+   Additionally, create a `process_pcd` folder under `mmscan_data/embodiedscan_split` to store the results. Similarly, we recommend using a symbolic link, as the total file size might be a little large (approximately 21GB)
 
    PS: If you have followed the embodiedscan tutorial to organize the data, you can skip these steps and link or copy the `data` folder to
-   `mmscan_data/embodiedscan-split`.
+   `mmscan_data/embodiedscan_split`.
 
    After all the raw data is organized, the directory structure should be as below:
 
    ```
    mmscan_data
-   ├── embodiedscan-split/
+   ├── embodiedscan_split/
    │   ├── data/
    │   ├── process_pcd/
    │   ├── embodiedscan-v1/

diff --git a/data_preparation/meta-data/3rscan_matrix.npy → data_preparation/meta_data/3rscan_matrix.npy b/data_preparation/meta-data/3rscan_matrix.npy → data_preparation/meta_data/3rscan_matrix.npy
diff --git a/data_preparation/meta-data/all_scan.json → data_preparation/meta_data/all_scan.json b/data_preparation/meta-data/all_scan.json → data_preparation/meta_data/all_scan.json
diff --git a/data_preparation/meta-data/mp3d_mapping.json → data_preparation/meta_data/mp3d_mapping.json b/data_preparation/meta-data/mp3d_mapping.json → data_preparation/meta_data/mp3d_mapping.json
diff --git a/data_preparation/meta-data/mp3d_matrix.npy → data_preparation/meta_data/mp3d_matrix.npy b/data_preparation/meta-data/mp3d_matrix.npy → data_preparation/meta_data/mp3d_matrix.npy
diff --git a/...a-data/scans_axis_alignment_matrices.json → ...a_data/scans_axis_alignment_matrices.json b/...a-data/scans_axis_alignment_matrices.json → ...a_data/scans_axis_alignment_matrices.json
diff --git a/data_preparation/process_all_scan.py b/data_preparation/process_all_scan.py
@@ -157,9 +157,9 @@ def process_one_scan(
 
 
 if __name__ == '__main__':
-    path_of_version1 = '../mmscan_data/embodiedscan-split/embodiedscan-v1'
+    path_of_version1 = '../mmscan_data/embodiedscan_split/embodiedscan-v1'
     parser = ArgumentParser()
-    parser.add_argument('--meta_path', type=str, default='./meta-data')
+    parser.add_argument('--meta_path', type=str, default='./meta_data')
     parser.add_argument(
         '--data_root',
         type=str,

diff --git a/mmscan/evaluator/gpt_evaluation.py b/mmscan/evaluator/gpt_evaluation.py
@@ -19,7 +19,7 @@ class GPTEvaluator:
         api_key (str) : The openai key.
         model (str) : The GPT model to use, default we use "gpt-4o-mini".
             Defaults to "gpt-4o-mini".
-        verbose (bool) : Whether to print the evaluation results or not.
+        show_progress (bool) : Whether to print the evaluation results or not.
             Defaults to False.
     """
 
@@ -74,12 +74,12 @@ def normal_query(self,
         response = json.loads(response.choices[0].message.content)
         return response
 
-    def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int,
+    def qa_evaluation(self, all_samples: dict, thread_index: int,
                       tmp_path: str) -> None:
         """Employ the GPT evaluator.
 
         Args:
-            QA_sample_dict (List[dict]) : The QA sample dict with
+            all_samples (dict) : The QA sample dict with QA_ID as keys and
                 [gt, pred, question] as values.
             thread_index (int) : The index of the thread.
             tmp_path (str) : The path to store the
@@ -92,11 +92,11 @@ def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int,
         MAXTRY = 3
         gpt_eval_results = {}
 
-        for QA_ID in tqdm(QA_sample_dict):
+        for sample_id in tqdm(all_samples):
             GPT_INTPUT = {
-                'Question': QA_sample_dict[QA_ID]['question'],
-                'Model Answer': QA_sample_dict[QA_ID]['pred'],
-                'Human Answer': QA_sample_dict[QA_ID]['gt'][0],
+                'Question': all_samples[sample_id]['question'],
+                'Model Answer': all_samples[sample_id]['pred'],
+                'Human Answer': all_samples[sample_id]['gt'][0],
             }
 
             for _ in range(MAXTRY):
@@ -116,10 +116,10 @@ def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int,
 
                     FLAG = True
                 except Exception:
-                    # print("error!")
+
                     continue
                 if FLAG:
-                    gpt_eval_results[QA_ID] = GPT_OUTPUT
+                    gpt_eval_results[sample_id] = GPT_OUTPUT
 
         with open(
                 tmp_path.replace('.json',
@@ -210,16 +210,17 @@ def load_and_eval(self,
 
         for thread_index in range(num_threads):
             # Create a sub-dictionary for each thread
-            QA_sample_dict = {
+            partial_samples = {
                 ID_: batch_result[ID_]
                 for ID_ in IDs_divide_index[thread_index]
             }
             if self.show_progress:
                 print(
-                    f'Thread {thread_index} processing {len(QA_sample_dict)}')
+                    f'Thread {thread_index} processing {len(partial_samples)}')
             thread = threading.Thread(
                 target=self.qa_evaluation,
-                args=(QA_sample_dict, thread_index, tmp_path + '/gpt_QA.json'),
+                args=(partial_samples, thread_index,
+                      tmp_path + '/gpt_QA.json'),
             )
             threads.append(thread)
 

diff --git a/mmscan/evaluator/metrics/box_metric.py b/mmscan/evaluator/metrics/box_metric.py
@@ -222,22 +222,22 @@ def get_average_precision(iou_array: np.ndarray, iou_threshold: float) \
     return average_precision(recall, precision), np.max(recall)
 
 
-def get_multi_topk_scores(iou_array: Union[np.ndarray, torch.tensor],
-                          iou_threshold: float,
-                          mode: str = 'sigma') -> Dict[str, float]:
-    """
-    Compute the multi-topk metric, we provide two modes.
-    "simple": 1/N *Hit(min(N*k,len(pred)))
-    "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N
-        Hit(M) return the number of gtound truths hitted by the first
-        M predictions.
-        N = the number of gtound truths
+def get_general_topk_scores(iou_array: Union[np.ndarray, torch.tensor],
+                            iou_threshold: float,
+                            mode: str = 'sigma') -> Dict[str, float]:
+    """Compute the multi-topk metric, we provide two modes.
+
     Args:
         iou_array (ndarray/tensor):
             the iou matrix of the predictions and ground truths
             (shape len(preds)*len(gts))
         iou_threshold (float): 0.25/0.5
         mode (str): 'sigma'/'simple'
+                "simple": 1/N * Hit(min(N*k,len(pred)))
+                "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N
+                    Hit(M) return the number of gtound truths hitted by
+                    the first M predictions.
+                    N = the number of gtound truths
                 Default to 'sigma'.
 
     Returns:

diff --git a/mmscan/evaluator/metrics/lang_metric.py b/mmscan/evaluator/metrics/lang_metric.py
@@ -25,7 +25,7 @@ def to_coco(kvs, keys):
     return res
 
 
-def coco_evaluate(batch_input: List[dict]) -> Tuple[dict, dict]:
+def coco_evaluation(batch_input: List[dict]) -> Tuple[dict, dict]:
     """Calculate the extract matching score for each item.
     Args:
         batch_input(list[dict]):
@@ -60,6 +60,7 @@ def coco_evaluate(batch_input: List[dict]) -> Tuple[dict, dict]:
     final_list = {}
     ref_coco = tokenizer.tokenize(to_coco(ref_sent, ref_sent.keys()))
     hypo_coco = tokenizer.tokenize(to_coco(hypo_sent, ref_sent.keys()))
+
     for scorer, method in scorers:
         score, scores = scorer.compute_score(ref_coco, hypo_coco)
         if type(score) == list:
@@ -119,17 +120,18 @@ def em_evaluation(batch_input: List[dict]) -> Tuple[list, list]:
     return em_result, refine_em_result
 
 
-class simcse_evaluator:
-    """A class for calculating the simcse similarity score.
+class SimCSEEvaluator:
+    """A class for calculating the simcse similarity score. Using Sentence
+    Embeddings to calculate similarity between pred/gt。
 
     Args:
         model_path: path to the simcse pretrained model.
     """
 
     def __init__(self, model_path: str, eval_bs: int = 500) -> None:
-        self.eval_bs = eval_bs
         if len(model_path) == 0:
             model_path = 'princeton-nlp/sup-simcse-roberta-large'
+        self.eval_bs = eval_bs
         self.simcse_tokenizer = AutoTokenizer.from_pretrained(model_path)
         self.simcse_model = AutoModel.from_pretrained(model_path).to('cuda')
 
@@ -139,10 +141,10 @@ def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str],
         a batch.
 
         Args:
-            gt_count(list[int]):
-                stores number of possible answers to a question
             all_pred(list[str]): all prediction
             all_gt(list[str]): all ground truth
+            gt_count(list[int]):
+                stores number of possible answers to a question
             tips: len(all_gt)>=len(all_pred)
                 there may be multiple gt answers for a question.
 
@@ -214,17 +216,18 @@ def evaluation(self, batch_input: List[dict]) -> List[float]:
         return all_simcse_similarity
 
 
-class sbert_evaluator:
-    """A class for calculating the sbert similarity score.
+class SBERTEvaluator:
+    """A class for calculating the sbert similarity score. Using Sentence-BERT
+    to calculate similarity between pred/gt.
 
     Args:
         model_path: path to the sbert pretrained model.
     """
 
     def __init__(self, model_path: str, eval_bs: int = 500) -> None:
-        self.eval_bs = eval_bs
         if len(model_path) == 0:
             model_path = 'all-mpnet-base-v2'
+        self.eval_bs = eval_bs
         self.sbert_model = SentenceTransformer(model_path, device='cuda')
 
     def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str],
@@ -233,10 +236,10 @@ def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str],
         batch.
 
         Args:
-            gt_count(list[int]): stores number of possible
-                answers to a question
             all_pred(list[str]): all prediction
             all_gt(list[str]): all ground truth
+            gt_count(list[int]): stores number of possible
+                answers to a question
             tips: len(all_gt)>=len(all_pred) because there may be multiple
                   gt answers for a question.
 

diff --git a/mmscan/evaluator/qa_evaluation.py b/mmscan/evaluator/qa_evaluation.py
@@ -2,9 +2,10 @@
 
 import torch
 
-from mmscan.evaluator.metrics.lang_metric import (coco_evaluate, em_evaluation,
-                                                  sbert_evaluator,
-                                                  simcse_evaluator)
+from mmscan.evaluator.metrics.lang_metric import (SBERTEvaluator,
+                                                  SimCSEEvaluator,
+                                                  coco_evaluation,
+                                                  em_evaluation)
 from mmscan.utils.lang_utils import special_token_filter
 
 
@@ -39,12 +40,12 @@ def __init__(self,
         self.special_metric = []
         if 'simcse' in model_config and torch.cuda.is_available():
             self.special_metric.append('simcse')
-            self.simcse_evaluator = simcse_evaluator(model_config['simcse'],
-                                                     eval_bs=self.eval_bs)
+            self.simcse_evaluator = SimCSEEvaluator(model_config['simcse'],
+                                                    eval_bs=self.eval_bs)
         if 'sbert' in model_config and torch.cuda.is_available():
             self.special_metric.append('sbert')
-            self.sbert_evaluator = sbert_evaluator(model_config['sbert'],
-                                                   eval_bs=self.eval_bs)
+            self.sbert_evaluator = SBERTEvaluator(model_config['sbert'],
+                                                  eval_bs=self.eval_bs)
 
         self.eval_metric = [
             'EM',
@@ -125,7 +126,7 @@ def start_evaluation(self) -> dict:
         EM_, refine_EM_ = em_evaluation(self.save_buffer)
 
         # (2) coco metric evaluation
-        coco_scores, coco_scores_list = coco_evaluate(self.save_buffer)
+        coco_scores, coco_scores_list = coco_evaluation(self.save_buffer)
 
         # (3) special metric evaluation, forward one time each batch
         if 'simcse' in self.special_metric:

diff --git a/mmscan/evaluator/vg_evaluation.py b/mmscan/evaluator/vg_evaluation.py
@@ -6,7 +6,7 @@
 from tqdm import tqdm
 
 from mmscan.evaluator.metrics.box_metric import (get_average_precision,
-                                                 get_multi_topk_scores,
+                                                 get_general_topk_scores,
                                                  subset_get_average_precision)
 from mmscan.utils.box_utils import index_box, to_9dof_box
 
@@ -115,7 +115,7 @@ def start_evaluation(self) -> dict:
 
                 # topk metric
                 metric_for_single.update(
-                    get_multi_topk_scores(iou_array, iou_thr))
+                    get_general_topk_scores(iou_array, iou_thr))
 
             data_item['num_gts'] = iou_array.shape[1]
             data_item.update(metric_for_single)
@@ -268,6 +268,7 @@ def __calculate_iou_array_(
 
         Args:
              data_item (dict): The subclass name in the original samples.
+
         Returns:
              np.ndarray, np.ndarray :
                 The iou array sorted by the confidence and the

diff --git a/mmscan/mmscan.py b/mmscan/mmscan.py
@@ -73,17 +73,17 @@ def __init__(
         self.check_mode = check_mode
         if self.check_mode:
             print("embodiedscan's checking mode!!!")
-        self.pkl_name = f'{self.dataroot}/embodiedscan-split' +\
+        self.pkl_name = f'{self.dataroot}/embodiedscan_split' +\
             f'/embodiedscan-{self.version}' +\
             f'/embodiedscan_infos_{split}.pkl'
-        self.data_path = '{}/embodiedscan-split/data'.format(self.dataroot)
+        self.data_path = '{}/embodiedscan_split/data'.format(self.dataroot)
         self.lang_anno_path = '{}/MMScan-beta-release'.format(self.dataroot)
 
-        self.pcd_path = '{}/embodiedscan-split/process_pcd'.format(
+        self.pcd_path = '{}/embodiedscan_split/process_pcd'.format(
             self.dataroot)
 
         self.mapping_json_path = (
-            '{}/../data_preparation/meta-data/mp3d_mapping.json'.format(
+            '{}/../data_preparation/meta_data/mp3d_mapping.json'.format(
                 self.dataroot))
         self.id_mapping = id_mapping(self.mapping_json_path)
         self.table_names = [