From ba472cf8ebcfee3048e847d36276c5e78300deb3 Mon Sep 17 00:00:00 2001 From: dohyun Date: Thu, 18 Jan 2024 11:11:18 -0500 Subject: [PATCH 1/7] Initial implementation of mmif rewinder --- mmif/utils/mmif_rewinder.py | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 mmif/utils/mmif_rewinder.py diff --git a/mmif/utils/mmif_rewinder.py b/mmif/utils/mmif_rewinder.py new file mode 100644 index 00000000..aef16340 --- /dev/null +++ b/mmif/utils/mmif_rewinder.py @@ -0,0 +1,83 @@ +import argparse +import mmif +import json + +def read_mmif(mmif_file): + """ + Function to read mmif file as a json file and return it as a dictionary. + (Would it be better to be a mmif object?) + + :param mmif_file: file path to the mmif. + :return: dictionary with mmif data + """ + try: + with open(mmif_file, 'r') as file: + mmif_data = json.load(file) + + print(f"\nSuccessfully loaded MMIF file: {mmif_file}") + + except FileNotFoundError: + print(f"Error: MMIF file '{mmif_file}' not found.") + except json.JSONDecodeError: + print(f"Error: Invalid JSON format in MMIF file '{mmif_file}'.") + except Exception as e: + print(f"Error: An unexpected error occurred - {e}") + + return mmif_data + + +def user_choice(mmif_data): + """ + Function to ask user to choose the rewind range. + + :param mmif_data: dictionary + :return: int option number + """ + + ## Give a user options (#, "app", "timestamp") - time order + n = len(mmif_data["views"]) + i = 0 # option number + # header + print("\n"+"{:<4} {:<30} {:<100}".format("num", "timestamp", "app")) + for view in mmif_data["views"]: + if "timestamp" in view["metadata"]: + option = "{:<4} {:<30} {:<100}".format(i, view["metadata"]["timestamp"], view["metadata"]["app"]) + else: + option = "{:<4} {:<30} {:<100}".format(i, "-", view["metadata"]["app"]) + print(option) + i += 1 + + ## User input + while True: + try: + choice = int(input("\nEnter the number to delete from that point by rewinding: ")) + if 0 <= choice <= n-1: + return choice + else: + print(f"\nInvalid choice. Please enter a number between 0 and {n-1}") + except ValueError: + print("\nInvalid input. Please enter a valid number.") + + +def process_mmif_from_user_choice(mmif_data, choice): + """ + Process rewinding of mmif data from user choice and save it in as a json file. + + :param mmif_data: + :param choice: + :return: Output.mmif + """ + mmif_data["views"] = mmif_data["views"][:choice] + file_name = str(input("\nEnter the file name for the rewound mmif: ")) + with open(file_name, 'w') as json_file: + json.dump(mmif_data, json_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process MMIF file.") + parser.add_argument("mmif_file", help="Path to the MMIF file") + args = parser.parse_args() + + mmif_data = read_mmif(args.mmif_file) + choice = user_choice(mmif_data) + process_mmif_from_user_choice(mmif_data, choice) \ No newline at end of file From 7171d74d7573a2f13be9996ba31a63a557602531 Mon Sep 17 00:00:00 2001 From: dohyun Date: Thu, 18 Jan 2024 16:29:12 -0500 Subject: [PATCH 2/7] New function __delete_last__(self, n: int) is added to the class ViewsList --- mmif/serialize/mmif.py | 5 +++++ mmif/utils/__init__.py | 1 + 2 files changed, 6 insertions(+) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 299402c7..9c4ae788 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -607,3 +607,8 @@ def get_last(self) -> Optional[View]: for view in reversed(self._items.values()): if 'error' not in view.metadata and 'warning' not in view.metadata: return view + + def __delete_last__(self, n: int) -> None: + list_reversed = list(reversed(self._items.keys()))[:n] + for key in list_reversed: + self._items.pop(key) diff --git a/mmif/utils/__init__.py b/mmif/utils/__init__.py index 5605a32d..5892ea76 100644 --- a/mmif/utils/__init__.py +++ b/mmif/utils/__init__.py @@ -1 +1,2 @@ from mmif.utils import video_document_helper +from mmif.utils import mmif_rewinder \ No newline at end of file From 2c5ec90b9b756010f88d091e7937d4919a0bc982 Mon Sep 17 00:00:00 2001 From: dohyun Date: Mon, 22 Jan 2024 11:50:40 -0500 Subject: [PATCH 3/7] Function name has been changed __delete_last__(self, n: int) -> __delete_last(self, n: int) --- mmif/serialize/mmif.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 9c4ae788..99994e8a 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -608,7 +608,7 @@ def get_last(self) -> Optional[View]: if 'error' not in view.metadata and 'warning' not in view.metadata: return view - def __delete_last__(self, n: int) -> None: + def __delete_last(self, n: int) -> None: list_reversed = list(reversed(self._items.keys()))[:n] for key in list_reversed: self._items.pop(key) From a2b75beb2c73d61eeb3bfbd1f1cbe7757c00b1d6 Mon Sep 17 00:00:00 2001 From: dohyun Date: Tue, 23 Jan 2024 10:35:50 -0500 Subject: [PATCH 4/7] Options are added: -o for output file name and -p for pretty printing. --- mmif/utils/mmif_rewinder.py | 70 ++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/mmif/utils/mmif_rewinder.py b/mmif/utils/mmif_rewinder.py index aef16340..86d3ac49 100644 --- a/mmif/utils/mmif_rewinder.py +++ b/mmif/utils/mmif_rewinder.py @@ -1,14 +1,15 @@ import argparse import mmif import json +import os -def read_mmif(mmif_file): +def read_mmif(mmif_file)->mmif.Mmif: """ - Function to read mmif file as a json file and return it as a dictionary. + Function to read mmif file and return the mmif object. (Would it be better to be a mmif object?) :param mmif_file: file path to the mmif. - :return: dictionary with mmif data + :return: mmif object """ try: with open(mmif_file, 'r') as file: @@ -16,34 +17,32 @@ def read_mmif(mmif_file): print(f"\nSuccessfully loaded MMIF file: {mmif_file}") + mmif_obj = mmif.Mmif(mmif_data) + + except FileNotFoundError: print(f"Error: MMIF file '{mmif_file}' not found.") - except json.JSONDecodeError: - print(f"Error: Invalid JSON format in MMIF file '{mmif_file}'.") except Exception as e: print(f"Error: An unexpected error occurred - {e}") - return mmif_data + return mmif_obj -def user_choice(mmif_data): +def user_choice(mmif_obj:mmif.Mmif) -> int: """ Function to ask user to choose the rewind range. - :param mmif_data: dictionary + :param mmif_obj: mmif object :return: int option number """ ## Give a user options (#, "app", "timestamp") - time order - n = len(mmif_data["views"]) - i = 0 # option number + n = len(mmif_obj.views) + i = 0 # option number # header - print("\n"+"{:<4} {:<30} {:<100}".format("num", "timestamp", "app")) - for view in mmif_data["views"]: - if "timestamp" in view["metadata"]: - option = "{:<4} {:<30} {:<100}".format(i, view["metadata"]["timestamp"], view["metadata"]["app"]) - else: - option = "{:<4} {:<30} {:<100}".format(i, "-", view["metadata"]["app"]) + print("\n" + "{:<4} {:<30} {:<100}".format("num", "timestamp", "app")) + for view in mmif_obj.views: + option = "{:<4} {:<30} {:<100}".format(i, str(view.metadata.timestamp), str(view.metadata.app)) print(option) i += 1 @@ -51,33 +50,46 @@ def user_choice(mmif_data): while True: try: choice = int(input("\nEnter the number to delete from that point by rewinding: ")) - if 0 <= choice <= n-1: + if 0 <= choice <= n - 1: return choice else: - print(f"\nInvalid choice. Please enter a number between 0 and {n-1}") + print(f"\nInvalid choice. Please enter a number between 0 and {n - 1}") except ValueError: print("\nInvalid input. Please enter a valid number.") -def process_mmif_from_user_choice(mmif_data, choice): +def process_mmif_from_user_choice(mmif_obj, choice: int, output_fp = "rewound.mmif", p=True) -> None: """ Process rewinding of mmif data from user choice and save it in as a json file. - :param mmif_data: - :param choice: - :return: Output.mmif + :param mmif_obj: mmif object + :param choice: integer to rewind from + :param output_fp: path to save the rewound output file + :return: rewound.mmif saved """ - mmif_data["views"] = mmif_data["views"][:choice] - file_name = str(input("\nEnter the file name for the rewound mmif: ")) - with open(file_name, 'w') as json_file: - json.dump(mmif_data, json_file) + n = len(mmif_obj.views) - choice + mmif_obj.views.__delete_last(n) + mmif_serialized = mmif_obj.serialize(pretty=p) + + # Check if the same file name exist in the path and avoid overwriting. + if os.path.exists(output_fp): + file_name, file_extension = os.path.splitext(output_fp) + count = 1 + while os.path.exists(f"{file_name}_{count}.mmif"): + count += 1 + output_fp = f"{file_name}_{count}.mmif" + + with open(output_fp, 'w') as mmif_file: + mmif_file.write(mmif_serialized) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process MMIF file.") parser.add_argument("mmif_file", help="Path to the MMIF file") + parser.add_argument("-o", '--output', type=str, help="Path to the rewound MMIF output file (default: rewound.mmif)") + parser.add_argument("-p", '--pretty', help="Pretty print (default: pretty=True)") args = parser.parse_args() - mmif_data = read_mmif(args.mmif_file) - choice = user_choice(mmif_data) - process_mmif_from_user_choice(mmif_data, choice) \ No newline at end of file + mmif_obj = read_mmif(args.mmif_file) + choice = user_choice(mmif_obj) + process_mmif_from_user_choice(mmif_obj, choice, args.output, args.pretty) \ No newline at end of file From f173a378808bc7359a25a1694098d6fed65592df Mon Sep 17 00:00:00 2001 From: dohyun Date: Mon, 22 Jan 2024 17:32:04 -0500 Subject: [PATCH 5/7] Review comments reflected: - changed rewinder file name taking off mmif_. - changed _delete_last function name to be single underscored. - added CLI argument for number of views to rewind. - changed read_mmif from json.load() -> file.read() --- mmif/serialize/mmif.py | 2 +- mmif/utils/__init__.py | 2 +- mmif/utils/{mmif_rewinder.py => rewinder.py} | 47 ++++++++++++-------- 3 files changed, 30 insertions(+), 21 deletions(-) rename mmif/utils/{mmif_rewinder.py => rewinder.py} (57%) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 99994e8a..0d62fc03 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -608,7 +608,7 @@ def get_last(self) -> Optional[View]: if 'error' not in view.metadata and 'warning' not in view.metadata: return view - def __delete_last(self, n: int) -> None: + def _delete_last(self, n: int) -> None: list_reversed = list(reversed(self._items.keys()))[:n] for key in list_reversed: self._items.pop(key) diff --git a/mmif/utils/__init__.py b/mmif/utils/__init__.py index 5892ea76..840a67cd 100644 --- a/mmif/utils/__init__.py +++ b/mmif/utils/__init__.py @@ -1,2 +1,2 @@ from mmif.utils import video_document_helper -from mmif.utils import mmif_rewinder \ No newline at end of file +from mmif.utils import rewinder \ No newline at end of file diff --git a/mmif/utils/mmif_rewinder.py b/mmif/utils/rewinder.py similarity index 57% rename from mmif/utils/mmif_rewinder.py rename to mmif/utils/rewinder.py index 86d3ac49..ad207528 100644 --- a/mmif/utils/mmif_rewinder.py +++ b/mmif/utils/rewinder.py @@ -1,6 +1,5 @@ import argparse import mmif -import json import os def read_mmif(mmif_file)->mmif.Mmif: @@ -13,12 +12,7 @@ def read_mmif(mmif_file)->mmif.Mmif: """ try: with open(mmif_file, 'r') as file: - mmif_data = json.load(file) - - print(f"\nSuccessfully loaded MMIF file: {mmif_file}") - - mmif_obj = mmif.Mmif(mmif_data) - + mmif_obj = mmif.Mmif(file.read()) except FileNotFoundError: print(f"Error: MMIF file '{mmif_file}' not found.") @@ -27,6 +21,15 @@ def read_mmif(mmif_file)->mmif.Mmif: return mmif_obj +def is_valid_choice(choice): + try: + ichoice = int(choice) + if 0 <= ichoice: + return ichoice + else: + raise ValueError(f"\nInvalid argument for -n. Please enter a positive integer.") + except ValueError: + raise argparse.ArgumentTypeError(f"\nInvalid argument for -n. Please enter a positive integer.") def user_choice(mmif_obj:mmif.Mmif) -> int: """ @@ -42,33 +45,33 @@ def user_choice(mmif_obj:mmif.Mmif) -> int: # header print("\n" + "{:<4} {:<30} {:<100}".format("num", "timestamp", "app")) for view in mmif_obj.views: - option = "{:<4} {:<30} {:<100}".format(i, str(view.metadata.timestamp), str(view.metadata.app)) + option = "{:<4} {:<30} {:<100}".format(n-i, str(view.metadata.timestamp), str(view.metadata.app)) print(option) i += 1 ## User input while True: + choice = int(input("\nEnter the number to delete from that point by rewinding: ")) try: - choice = int(input("\nEnter the number to delete from that point by rewinding: ")) - if 0 <= choice <= n - 1: + if 0 <= choice <= n: return choice else: - print(f"\nInvalid choice. Please enter a number between 0 and {n - 1}") + print(f"\nInvalid choice. Please enter an integer in the range [0, {n}].") except ValueError: print("\nInvalid input. Please enter a valid number.") -def process_mmif_from_user_choice(mmif_obj, choice: int, output_fp = "rewound.mmif", p=True) -> None: +def process_mmif(mmif_obj, choice: int, output_fp = "rewound.mmif", p=True) -> None: """ Process rewinding of mmif data from user choice and save it in as a json file. :param mmif_obj: mmif object :param choice: integer to rewind from :param output_fp: path to save the rewound output file + :param p: whether using pretty printing or not :return: rewound.mmif saved """ - n = len(mmif_obj.views) - choice - mmif_obj.views.__delete_last(n) + mmif_obj.views._delete_last(choice) mmif_serialized = mmif_obj.serialize(pretty=p) # Check if the same file name exist in the path and avoid overwriting. @@ -81,15 +84,21 @@ def process_mmif_from_user_choice(mmif_obj, choice: int, output_fp = "rewound.mm with open(output_fp, 'w') as mmif_file: mmif_file.write(mmif_serialized) - + print("Successfully processed the rewind") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process MMIF file.") parser.add_argument("mmif_file", help="Path to the MMIF file") - parser.add_argument("-o", '--output', type=str, help="Path to the rewound MMIF output file (default: rewound.mmif)") - parser.add_argument("-p", '--pretty', help="Pretty print (default: pretty=True)") + parser.add_argument("-o", '--output', default = "rewound.mmif", type=str, help="Path to the rewound MMIF output file (default: rewound.mmif)") + parser.add_argument("-p", '--pretty', default = True, type = bool, help="Pretty print (default: pretty=True)") + parser.add_argument("-n", '--number', default = "0", type = is_valid_choice, help="Number of views to rewind (default: 0)") args = parser.parse_args() mmif_obj = read_mmif(args.mmif_file) - choice = user_choice(mmif_obj) - process_mmif_from_user_choice(mmif_obj, choice, args.output, args.pretty) \ No newline at end of file + + if args.number == 0: # If user doesn't know how many views to rewind, give them choices. + choice = user_choice(mmif_obj) + else: + choice = args.number + + process_mmif(mmif_obj, choice, args.output, args.pretty) \ No newline at end of file From 1dc8a1ba61daeb86887e4b735ebd07acbcd8900b Mon Sep 17 00:00:00 2001 From: dohyun Date: Thu, 25 Jan 2024 14:01:01 -0500 Subject: [PATCH 6/7] Added a unit test for mmif.views._delete_last(n). --- tests/test_serialize.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_serialize.py b/tests/test_serialize.py index 341b4e34..f31a19ff 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -111,6 +111,14 @@ def test_new_view(self): mmif_obj.new_view() # just raise exception if this fails self.assertEqual(old_view_count+1, len(mmif_obj.views)) + def test_delete_last_n_view(self): + mmif_obj = Mmif(self.mmif_examples_json['everything']) + original_view_count = len(mmif_obj.views) + original_last_view = mmif_obj.views.get_last().id + mmif_obj.views._delete_last(1) + self.assertEqual(original_view_count-1, len(mmif_obj.views)) + self.assertTrue(mmif_obj.views.get_last().id != original_last_view) + def test_document_text(self): text = "Karen flew to New York." en = 'en' From 6de4fa3f297735a62d448673ddb9991b3c55a1ee Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Sun, 28 Jan 2024 21:13:08 -0500 Subject: [PATCH 7/7] updated rewinder module ... * optimized MMIF file I/O * added rewinding by number of apps (opposed to number of views, not exposed in CLI) --- mmif/serialize/mmif.py | 5 --- mmif/utils/rewinder.py | 87 +++++++++++++++++++++-------------------- tests/test_serialize.py | 8 ---- 3 files changed, 44 insertions(+), 56 deletions(-) diff --git a/mmif/serialize/mmif.py b/mmif/serialize/mmif.py index 0d62fc03..299402c7 100644 --- a/mmif/serialize/mmif.py +++ b/mmif/serialize/mmif.py @@ -607,8 +607,3 @@ def get_last(self) -> Optional[View]: for view in reversed(self._items.values()): if 'error' not in view.metadata and 'warning' not in view.metadata: return view - - def _delete_last(self, n: int) -> None: - list_reversed = list(reversed(self._items.keys()))[:n] - for key in list_reversed: - self._items.pop(key) diff --git a/mmif/utils/rewinder.py b/mmif/utils/rewinder.py index ad207528..bf938b03 100644 --- a/mmif/utils/rewinder.py +++ b/mmif/utils/rewinder.py @@ -1,25 +1,8 @@ import argparse -import mmif -import os - -def read_mmif(mmif_file)->mmif.Mmif: - """ - Function to read mmif file and return the mmif object. - (Would it be better to be a mmif object?) - - :param mmif_file: file path to the mmif. - :return: mmif object - """ - try: - with open(mmif_file, 'r') as file: - mmif_obj = mmif.Mmif(file.read()) +from pathlib import Path as P - except FileNotFoundError: - print(f"Error: MMIF file '{mmif_file}' not found.") - except Exception as e: - print(f"Error: An unexpected error occurred - {e}") +import mmif - return mmif_obj def is_valid_choice(choice): try: @@ -61,44 +44,62 @@ def user_choice(mmif_obj:mmif.Mmif) -> int: print("\nInvalid input. Please enter a valid number.") -def process_mmif(mmif_obj, choice: int, output_fp = "rewound.mmif", p=True) -> None: +def rewind_mmif(mmif_obj: mmif.Mmif, choice: int, choice_is_viewnum: bool = True) -> mmif.Mmif: """ - Process rewinding of mmif data from user choice and save it in as a json file. + Rewind MMIF by deleting the last N views. + The number of views to rewind is given as a number of "views", or number of "producer apps". + By default, the number argument is interpreted as the number of "views". :param mmif_obj: mmif object - :param choice: integer to rewind from - :param output_fp: path to save the rewound output file - :param p: whether using pretty printing or not - :return: rewound.mmif saved + :param choice: number of views to rewind + :param choice_is_viewnum: if True, choice is the number of views to rewind. If False, choice is the number of producer apps to rewind. + :return: rewound mmif object """ - mmif_obj.views._delete_last(choice) - mmif_serialized = mmif_obj.serialize(pretty=p) - - # Check if the same file name exist in the path and avoid overwriting. - if os.path.exists(output_fp): - file_name, file_extension = os.path.splitext(output_fp) - count = 1 - while os.path.exists(f"{file_name}_{count}.mmif"): - count += 1 - output_fp = f"{file_name}_{count}.mmif" + if choice_is_viewnum: + for vid in list(v.id for v in mmif_obj.views)[-1:-choice-1:-1]: + mmif_obj.views._items.pop(vid) + else: + app_count = 0 + cur_app = "" + vid_to_pop = [] + for v in reversed(mmif_obj.views): + if app_count >= choice: + break + if v.metadata.app != cur_app: + app_count += 1 + cur_app = v.metadata.app + vid_to_pop.append(v.id) + for vid in vid_to_pop: + mmif_obj.views._items.pop(vid) + return mmif_obj - with open(output_fp, 'w') as mmif_file: - mmif_file.write(mmif_serialized) - print("Successfully processed the rewind") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process MMIF file.") parser.add_argument("mmif_file", help="Path to the MMIF file") - parser.add_argument("-o", '--output', default = "rewound.mmif", type=str, help="Path to the rewound MMIF output file (default: rewound.mmif)") - parser.add_argument("-p", '--pretty', default = True, type = bool, help="Pretty print (default: pretty=True)") - parser.add_argument("-n", '--number', default = "0", type = is_valid_choice, help="Number of views to rewind (default: 0)") + parser.add_argument("-o", '--output', default="rewound.mmif", type=str, help="Path to the rewound MMIF output file (default: rewound.mmif)") + parser.add_argument("-p", '--pretty', action='store_true', help="Pretty print (default: pretty=True)") + parser.add_argument("-n", '--number', default="0", type=is_valid_choice, help="Number of views to rewind (default: 0)") args = parser.parse_args() - mmif_obj = read_mmif(args.mmif_file) + mmif_obj = mmif.Mmif(open(args.mmif_file).read()) if args.number == 0: # If user doesn't know how many views to rewind, give them choices. choice = user_choice(mmif_obj) else: choice = args.number - process_mmif(mmif_obj, choice, args.output, args.pretty) \ No newline at end of file + + # Check if the same file name exist in the path and avoid overwriting. + output_fp = P(args.output) + if output_fp.is_file(): + parent = output_fp.parent + stem = output_fp.stem + suffix = output_fp.suffix + count = 1 + while (parent / f"{stem}_{count}{suffix}").is_file(): + count += 1 + output_fp = parent / f"{stem}_{count}{suffix}" + + with open(output_fp, 'w') as mmif_file: + mmif_file.write(rewind_mmif(mmif_obj, choice, args.output).serialize(pretty=args.pretty)) diff --git a/tests/test_serialize.py b/tests/test_serialize.py index f31a19ff..341b4e34 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -111,14 +111,6 @@ def test_new_view(self): mmif_obj.new_view() # just raise exception if this fails self.assertEqual(old_view_count+1, len(mmif_obj.views)) - def test_delete_last_n_view(self): - mmif_obj = Mmif(self.mmif_examples_json['everything']) - original_view_count = len(mmif_obj.views) - original_last_view = mmif_obj.views.get_last().id - mmif_obj.views._delete_last(1) - self.assertEqual(original_view_count-1, len(mmif_obj.views)) - self.assertTrue(mmif_obj.views.get_last().id != original_last_view) - def test_document_text(self): text = "Karen flew to New York." en = 'en'