From e81e4ca3f2c98d53dc68c5abfb9a7cd22263de29 Mon Sep 17 00:00:00 2001 From: rbler1234 Date: Wed, 18 Dec 2024 14:58:29 +0800 Subject: [PATCH] fix some typos --- README.md | 25 +++--------------- data_preparation/README.md | 10 +++---- .../3rscan_matrix.npy | Bin .../{meta-data => meta_data}/all_scan.json | 0 .../mp3d_mapping.json | 0 .../{meta-data => meta_data}/mp3d_matrix.npy | Bin .../scans_axis_alignment_matrices.json | 0 data_preparation/process_all_scan.py | 4 +-- mmscan/evaluator/gpt_evaluation.py | 25 +++++++++--------- mmscan/evaluator/metrics/box_metric.py | 20 +++++++------- mmscan/evaluator/metrics/lang_metric.py | 25 ++++++++++-------- mmscan/evaluator/qa_evaluation.py | 17 ++++++------ mmscan/evaluator/vg_evaluation.py | 5 ++-- mmscan/mmscan.py | 8 +++--- mmscan/utils/box_utils.py | 2 ++ mmscan/utils/data_io.py | 5 ---- mmscan/utils/lang_utils.py | 3 ++- setup.py | 2 +- 18 files changed, 69 insertions(+), 82 deletions(-) rename data_preparation/{meta-data => meta_data}/3rscan_matrix.npy (100%) rename data_preparation/{meta-data => meta_data}/all_scan.json (100%) rename data_preparation/{meta-data => meta_data}/mp3d_mapping.json (100%) rename data_preparation/{meta-data => meta_data}/mp3d_matrix.npy (100%) rename data_preparation/{meta-data => meta_data}/scans_axis_alignment_matrices.json (100%) diff --git a/README.md b/README.md index e53d4fc..a852d5b 100644 --- a/README.md +++ b/README.md @@ -81,13 +81,13 @@ existing benchmarks and in-the-wild evaluation. 1. Download the Embodiedscan and MMScan annotation. (Fill in the [form](https://docs.google.com/forms/d/e/1FAIpQLScUXEDTksGiqHZp31j7Zp7zlCNV7p_08uViwP_Nbzfn3g6hhw/viewform) to apply for downloading) - Create a folder `mmscan_data/` and then unzip the files. For the first zip file, put `embodiedscan` under `mmscan_data/embodiedscan-split` and rename it to `embodiedscan-v1`. For the second zip file, put `MMScan-beta-release` under `mmscan_data/MMScan-beta-release` and `embodiedscan-v2` under `mmscan_data/embodiedscan-split`. + Create a folder `mmscan_data/` and then unzip the files. For the first zip file, put `embodiedscan` under `mmscan_data/embodiedscan_split` and rename it to `embodiedscan-v1`. For the second zip file, put `MMScan-beta-release` under `mmscan_data/MMScan-beta-release` and `embodiedscan-v2` under `mmscan_data/embodiedscan_split`. The directory structure should be as below: ``` mmscan_data - ├── embodiedscan-split + ├── embodiedscan_split │ ├──embodiedscan-v1/ # EmbodiedScan v1 data in 'embodiedscan.zip' │ ├──embodiedscan-v2/ # EmbodiedScan v2 data in 'embodiedscan-v2-beta.zip' ├── MMScan-beta-release # MMScan veta data in 'embodiedscan-v2-beta.zip' @@ -146,37 +146,21 @@ Each dataset item is a dictionary containing key elements: (2) Language Modality - **"sub_class"**: Sample category. - - **"ID"**: Unique sample ID. - - **"scan_id"**: Corresponding scan ID. - - **VG Task:** - +- **--------------For Visual Grounding Task** - **"target_id"** (list\[int\]): IDs of target objects. - - **"text"** (str): Grounding text. - - **"target"** (list\[str\]): Types of target objects. - - **"anchors"** (list\[str\]): Types of anchor objects. - - **"anchor_ids"** (list\[int\]): IDs of anchor objects. - - **"tokens_positive"** (dict): Position indices of mentioned objects in the text. - - **QA Task:** - +- **--------------ForQuestion Answering Task** - **"question"** (str): The question text. - - **"answers"** (list\[str\]): List of possible answers. - - **"object_ids"** (list\[int\]): Object IDs referenced in the question. - - **"object_names"** (list\[str\]): Types of referenced objects. - - **"input_bboxes_id"** (list\[int\]): IDs of input bounding boxes. - - **"input_bboxes"** (list\[np.ndarray\]): Input bounding boxes, 9 DoF. (3) 2D Modality @@ -324,5 +308,4 @@ We have adapted the MMScan API for some [models](./models/README.md). ## 📝 TODO List - \[ \] More Visual Grounding baselines and Question Answering baselines. -- \[ \] Captioning benchmark update - \[ \] Full release and further updates. diff --git a/data_preparation/README.md b/data_preparation/README.md index dbbf9ae..4273267 100644 --- a/data_preparation/README.md +++ b/data_preparation/README.md @@ -9,10 +9,10 @@ Detailed steps are shown as follows. 3. Download Matterport3D data [HERE](https://github.com/niessner/Matterport). Link or move the folder to this level of directory. -4. Organize the file structure. You are recommanded to create a soft link to the raw data folder under `mmscan_data/embodiedscan-split/data`. +4. Organize the file structure. You are recommanded to create a soft link to the raw data folder under `mmscan_data/embodiedscan_split/data`. ``` - mmscan_data/embodiedscan-split/data/ + mmscan_data/embodiedscan_split/data/ ├── scannet/ │ ├── scans │ │ ├── @@ -25,16 +25,16 @@ Detailed steps are shown as follows. │ ├── ... ``` - Additionally, create a `process_pcd` folder under `mmscan_data/embodiedscan-split` to store the results. Similarly, we recommend using a symbolic link, as the total file size might be a little large (approximately 21GB) + Additionally, create a `process_pcd` folder under `mmscan_data/embodiedscan_split` to store the results. Similarly, we recommend using a symbolic link, as the total file size might be a little large (approximately 21GB) PS: If you have followed the embodiedscan tutorial to organize the data, you can skip these steps and link or copy the `data` folder to - `mmscan_data/embodiedscan-split`. + `mmscan_data/embodiedscan_split`. After all the raw data is organized, the directory structure should be as below: ``` mmscan_data - ├── embodiedscan-split/ + ├── embodiedscan_split/ │ ├── data/ │ ├── process_pcd/ │ ├── embodiedscan-v1/ diff --git a/data_preparation/meta-data/3rscan_matrix.npy b/data_preparation/meta_data/3rscan_matrix.npy similarity index 100% rename from data_preparation/meta-data/3rscan_matrix.npy rename to data_preparation/meta_data/3rscan_matrix.npy diff --git a/data_preparation/meta-data/all_scan.json b/data_preparation/meta_data/all_scan.json similarity index 100% rename from data_preparation/meta-data/all_scan.json rename to data_preparation/meta_data/all_scan.json diff --git a/data_preparation/meta-data/mp3d_mapping.json b/data_preparation/meta_data/mp3d_mapping.json similarity index 100% rename from data_preparation/meta-data/mp3d_mapping.json rename to data_preparation/meta_data/mp3d_mapping.json diff --git a/data_preparation/meta-data/mp3d_matrix.npy b/data_preparation/meta_data/mp3d_matrix.npy similarity index 100% rename from data_preparation/meta-data/mp3d_matrix.npy rename to data_preparation/meta_data/mp3d_matrix.npy diff --git a/data_preparation/meta-data/scans_axis_alignment_matrices.json b/data_preparation/meta_data/scans_axis_alignment_matrices.json similarity index 100% rename from data_preparation/meta-data/scans_axis_alignment_matrices.json rename to data_preparation/meta_data/scans_axis_alignment_matrices.json diff --git a/data_preparation/process_all_scan.py b/data_preparation/process_all_scan.py index 2eef49b..7509110 100644 --- a/data_preparation/process_all_scan.py +++ b/data_preparation/process_all_scan.py @@ -157,9 +157,9 @@ def process_one_scan( if __name__ == '__main__': - path_of_version1 = '../mmscan_data/embodiedscan-split/embodiedscan-v1' + path_of_version1 = '../mmscan_data/embodiedscan_split/embodiedscan-v1' parser = ArgumentParser() - parser.add_argument('--meta_path', type=str, default='./meta-data') + parser.add_argument('--meta_path', type=str, default='./meta_data') parser.add_argument( '--data_root', type=str, diff --git a/mmscan/evaluator/gpt_evaluation.py b/mmscan/evaluator/gpt_evaluation.py index 9fd0f08..8033f8b 100644 --- a/mmscan/evaluator/gpt_evaluation.py +++ b/mmscan/evaluator/gpt_evaluation.py @@ -19,7 +19,7 @@ class GPTEvaluator: api_key (str) : The openai key. model (str) : The GPT model to use, default we use "gpt-4o-mini". Defaults to "gpt-4o-mini". - verbose (bool) : Whether to print the evaluation results or not. + show_progress (bool) : Whether to print the evaluation results or not. Defaults to False. """ @@ -74,12 +74,12 @@ def normal_query(self, response = json.loads(response.choices[0].message.content) return response - def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int, + def qa_evaluation(self, all_samples: dict, thread_index: int, tmp_path: str) -> None: """Employ the GPT evaluator. Args: - QA_sample_dict (List[dict]) : The QA sample dict with + all_samples (dict) : The QA sample dict with QA_ID as keys and [gt, pred, question] as values. thread_index (int) : The index of the thread. tmp_path (str) : The path to store the @@ -92,11 +92,11 @@ def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int, MAXTRY = 3 gpt_eval_results = {} - for QA_ID in tqdm(QA_sample_dict): + for sample_id in tqdm(all_samples): GPT_INTPUT = { - 'Question': QA_sample_dict[QA_ID]['question'], - 'Model Answer': QA_sample_dict[QA_ID]['pred'], - 'Human Answer': QA_sample_dict[QA_ID]['gt'][0], + 'Question': all_samples[sample_id]['question'], + 'Model Answer': all_samples[sample_id]['pred'], + 'Human Answer': all_samples[sample_id]['gt'][0], } for _ in range(MAXTRY): @@ -116,10 +116,10 @@ def qa_evaluation(self, QA_sample_dict: List[dict], thread_index: int, FLAG = True except Exception: - # print("error!") + continue if FLAG: - gpt_eval_results[QA_ID] = GPT_OUTPUT + gpt_eval_results[sample_id] = GPT_OUTPUT with open( tmp_path.replace('.json', @@ -210,16 +210,17 @@ def load_and_eval(self, for thread_index in range(num_threads): # Create a sub-dictionary for each thread - QA_sample_dict = { + partial_samples = { ID_: batch_result[ID_] for ID_ in IDs_divide_index[thread_index] } if self.show_progress: print( - f'Thread {thread_index} processing {len(QA_sample_dict)}') + f'Thread {thread_index} processing {len(partial_samples)}') thread = threading.Thread( target=self.qa_evaluation, - args=(QA_sample_dict, thread_index, tmp_path + '/gpt_QA.json'), + args=(partial_samples, thread_index, + tmp_path + '/gpt_QA.json'), ) threads.append(thread) diff --git a/mmscan/evaluator/metrics/box_metric.py b/mmscan/evaluator/metrics/box_metric.py index ceb9b00..b09aaa8 100644 --- a/mmscan/evaluator/metrics/box_metric.py +++ b/mmscan/evaluator/metrics/box_metric.py @@ -222,22 +222,22 @@ def get_average_precision(iou_array: np.ndarray, iou_threshold: float) \ return average_precision(recall, precision), np.max(recall) -def get_multi_topk_scores(iou_array: Union[np.ndarray, torch.tensor], - iou_threshold: float, - mode: str = 'sigma') -> Dict[str, float]: - """ - Compute the multi-topk metric, we provide two modes. - "simple": 1/N *Hit(min(N*k,len(pred))) - "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N - Hit(M) return the number of gtound truths hitted by the first - M predictions. - N = the number of gtound truths +def get_general_topk_scores(iou_array: Union[np.ndarray, torch.tensor], + iou_threshold: float, + mode: str = 'sigma') -> Dict[str, float]: + """Compute the multi-topk metric, we provide two modes. + Args: iou_array (ndarray/tensor): the iou matrix of the predictions and ground truths (shape len(preds)*len(gts)) iou_threshold (float): 0.25/0.5 mode (str): 'sigma'/'simple' + "simple": 1/N * Hit(min(N*k,len(pred))) + "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N + Hit(M) return the number of gtound truths hitted by + the first M predictions. + N = the number of gtound truths Default to 'sigma'. Returns: diff --git a/mmscan/evaluator/metrics/lang_metric.py b/mmscan/evaluator/metrics/lang_metric.py index fdbd1dc..341ed55 100644 --- a/mmscan/evaluator/metrics/lang_metric.py +++ b/mmscan/evaluator/metrics/lang_metric.py @@ -25,7 +25,7 @@ def to_coco(kvs, keys): return res -def coco_evaluate(batch_input: List[dict]) -> Tuple[dict, dict]: +def coco_evaluation(batch_input: List[dict]) -> Tuple[dict, dict]: """Calculate the extract matching score for each item. Args: batch_input(list[dict]): @@ -60,6 +60,7 @@ def coco_evaluate(batch_input: List[dict]) -> Tuple[dict, dict]: final_list = {} ref_coco = tokenizer.tokenize(to_coco(ref_sent, ref_sent.keys())) hypo_coco = tokenizer.tokenize(to_coco(hypo_sent, ref_sent.keys())) + for scorer, method in scorers: score, scores = scorer.compute_score(ref_coco, hypo_coco) if type(score) == list: @@ -119,17 +120,18 @@ def em_evaluation(batch_input: List[dict]) -> Tuple[list, list]: return em_result, refine_em_result -class simcse_evaluator: - """A class for calculating the simcse similarity score. +class SimCSEEvaluator: + """A class for calculating the simcse similarity score. Using Sentence + Embeddings to calculate similarity between pred/gt。 Args: model_path: path to the simcse pretrained model. """ def __init__(self, model_path: str, eval_bs: int = 500) -> None: - self.eval_bs = eval_bs if len(model_path) == 0: model_path = 'princeton-nlp/sup-simcse-roberta-large' + self.eval_bs = eval_bs self.simcse_tokenizer = AutoTokenizer.from_pretrained(model_path) self.simcse_model = AutoModel.from_pretrained(model_path).to('cuda') @@ -139,10 +141,10 @@ def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str], a batch. Args: - gt_count(list[int]): - stores number of possible answers to a question all_pred(list[str]): all prediction all_gt(list[str]): all ground truth + gt_count(list[int]): + stores number of possible answers to a question tips: len(all_gt)>=len(all_pred) there may be multiple gt answers for a question. @@ -214,17 +216,18 @@ def evaluation(self, batch_input: List[dict]) -> List[float]: return all_simcse_similarity -class sbert_evaluator: - """A class for calculating the sbert similarity score. +class SBERTEvaluator: + """A class for calculating the sbert similarity score. Using Sentence-BERT + to calculate similarity between pred/gt. Args: model_path: path to the sbert pretrained model. """ def __init__(self, model_path: str, eval_bs: int = 500) -> None: - self.eval_bs = eval_bs if len(model_path) == 0: model_path = 'all-mpnet-base-v2' + self.eval_bs = eval_bs self.sbert_model = SentenceTransformer(model_path, device='cuda') def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str], @@ -233,10 +236,10 @@ def __batch_evaluation__(self, all_pred: List[str], all_gt: List[str], batch. Args: - gt_count(list[int]): stores number of possible - answers to a question all_pred(list[str]): all prediction all_gt(list[str]): all ground truth + gt_count(list[int]): stores number of possible + answers to a question tips: len(all_gt)>=len(all_pred) because there may be multiple gt answers for a question. diff --git a/mmscan/evaluator/qa_evaluation.py b/mmscan/evaluator/qa_evaluation.py index 2babc3c..d67e640 100644 --- a/mmscan/evaluator/qa_evaluation.py +++ b/mmscan/evaluator/qa_evaluation.py @@ -2,9 +2,10 @@ import torch -from mmscan.evaluator.metrics.lang_metric import (coco_evaluate, em_evaluation, - sbert_evaluator, - simcse_evaluator) +from mmscan.evaluator.metrics.lang_metric import (SBERTEvaluator, + SimCSEEvaluator, + coco_evaluation, + em_evaluation) from mmscan.utils.lang_utils import special_token_filter @@ -39,12 +40,12 @@ def __init__(self, self.special_metric = [] if 'simcse' in model_config and torch.cuda.is_available(): self.special_metric.append('simcse') - self.simcse_evaluator = simcse_evaluator(model_config['simcse'], - eval_bs=self.eval_bs) + self.simcse_evaluator = SimCSEEvaluator(model_config['simcse'], + eval_bs=self.eval_bs) if 'sbert' in model_config and torch.cuda.is_available(): self.special_metric.append('sbert') - self.sbert_evaluator = sbert_evaluator(model_config['sbert'], - eval_bs=self.eval_bs) + self.sbert_evaluator = SBERTEvaluator(model_config['sbert'], + eval_bs=self.eval_bs) self.eval_metric = [ 'EM', @@ -125,7 +126,7 @@ def start_evaluation(self) -> dict: EM_, refine_EM_ = em_evaluation(self.save_buffer) # (2) coco metric evaluation - coco_scores, coco_scores_list = coco_evaluate(self.save_buffer) + coco_scores, coco_scores_list = coco_evaluation(self.save_buffer) # (3) special metric evaluation, forward one time each batch if 'simcse' in self.special_metric: diff --git a/mmscan/evaluator/vg_evaluation.py b/mmscan/evaluator/vg_evaluation.py index bd2bda9..958d9e9 100644 --- a/mmscan/evaluator/vg_evaluation.py +++ b/mmscan/evaluator/vg_evaluation.py @@ -6,7 +6,7 @@ from tqdm import tqdm from mmscan.evaluator.metrics.box_metric import (get_average_precision, - get_multi_topk_scores, + get_general_topk_scores, subset_get_average_precision) from mmscan.utils.box_utils import index_box, to_9dof_box @@ -115,7 +115,7 @@ def start_evaluation(self) -> dict: # topk metric metric_for_single.update( - get_multi_topk_scores(iou_array, iou_thr)) + get_general_topk_scores(iou_array, iou_thr)) data_item['num_gts'] = iou_array.shape[1] data_item.update(metric_for_single) @@ -268,6 +268,7 @@ def __calculate_iou_array_( Args: data_item (dict): The subclass name in the original samples. + Returns: np.ndarray, np.ndarray : The iou array sorted by the confidence and the diff --git a/mmscan/mmscan.py b/mmscan/mmscan.py index eff2f05..85a6005 100644 --- a/mmscan/mmscan.py +++ b/mmscan/mmscan.py @@ -73,17 +73,17 @@ def __init__( self.check_mode = check_mode if self.check_mode: print("embodiedscan's checking mode!!!") - self.pkl_name = f'{self.dataroot}/embodiedscan-split' +\ + self.pkl_name = f'{self.dataroot}/embodiedscan_split' +\ f'/embodiedscan-{self.version}' +\ f'/embodiedscan_infos_{split}.pkl' - self.data_path = '{}/embodiedscan-split/data'.format(self.dataroot) + self.data_path = '{}/embodiedscan_split/data'.format(self.dataroot) self.lang_anno_path = '{}/MMScan-beta-release'.format(self.dataroot) - self.pcd_path = '{}/embodiedscan-split/process_pcd'.format( + self.pcd_path = '{}/embodiedscan_split/process_pcd'.format( self.dataroot) self.mapping_json_path = ( - '{}/../data_preparation/meta-data/mp3d_mapping.json'.format( + '{}/../data_preparation/meta_data/mp3d_mapping.json'.format( self.dataroot)) self.id_mapping = id_mapping(self.mapping_json_path) self.table_names = [ diff --git a/mmscan/utils/box_utils.py b/mmscan/utils/box_utils.py index 97556f9..8fe7284 100644 --- a/mmscan/utils/box_utils.py +++ b/mmscan/utils/box_utils.py @@ -75,6 +75,7 @@ def euler_to_matrix_np(euler: np.ndarray) -> np.ndarray: Args: euler (np.ndarray) : (..., 3) + Returns: np.ndarray : (..., 3, 3) """ @@ -93,6 +94,7 @@ def is_inside_box(points, center, size, rotation_mat): size: size of the box, numpy array of shape (3, ). rotation_mat: rotation matrix of the box, numpy array of shape (3, 3). + Returns: Boolean array of shape (n, ) indicating if each point is inside the box. diff --git a/mmscan/utils/data_io.py b/mmscan/utils/data_io.py index 87ea13b..6c08c1e 100644 --- a/mmscan/utils/data_io.py +++ b/mmscan/utils/data_io.py @@ -59,12 +59,7 @@ def read_annotation_pickle(path: str, show_progress: bool = True): pbar = (tqdm(range(len(datalist))) if show_progress else range( len(datalist))) for scene_idx in pbar: - # print(datalist[scene_idx]['sample_idx']) - # if "matterport3d" not in datalist[scene_idx]['sample_idx']: - # continue - # print(datalist[scene_idx].keys()) images = datalist[scene_idx]['images'] - # print(images[0].keys()) intrinsic = datalist[scene_idx].get('cam2img', None) # a 4x4 matrix missing_intrinsic = False diff --git a/mmscan/utils/lang_utils.py b/mmscan/utils/lang_utils.py index cc10b4c..840706f 100644 --- a/mmscan/utils/lang_utils.py +++ b/mmscan/utils/lang_utils.py @@ -78,6 +78,7 @@ def normalize_answer(s): Args: s (str): the raw answer. + Returns: str : the processed sentence. """ @@ -106,7 +107,6 @@ def exact_match_score(prediction, ground_truth): prediction (str): thr predicted answer. ground_truth (str): the gt answer. - Returns: float : the exact match score """ @@ -221,6 +221,7 @@ def qa_metric_map(eval_type): Args: eval_type (str): the class name. + Returns: str : the corresponding Abbrev. """ diff --git a/setup.py b/setup.py index 71749c7..c2c2a10 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ if __name__ == '__main__': my_packages = find_packages() - print(my_packages) + setup( name='mmscan', version='0.0.0',