From ac97d62f824865f76509dfe8ceaed5d340143d20 Mon Sep 17 00:00:00 2001 From: piotrj Date: Fri, 8 Dec 2023 21:10:10 +0100 Subject: [PATCH] news, refactoring --- CONTRIBUTING.md | 1 + README.md | 9 ++- requirements.txt | 1 + src/dialogs.py | 7 +- src/librer.py | 190 ++++++++++++++++++++++++++++++++++++----------- src/record.py | 7 +- 6 files changed, 165 insertions(+), 50 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6b564d8..f35ac4c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1 +1,2 @@ +if You have created a new custom data extractor for a specific data type, share it with others. Feel free to report issues, bugs, ideas any time. If you like this software, follow the author, leave a star or mention it on social media. diff --git a/README.md b/README.md index 5297342..9bc237b 100644 --- a/README.md +++ b/README.md @@ -55,8 +55,7 @@ Custom data extractor is a command that can be invoked with a single parameter - - Windows (10,11) ## Portability -**librer** writes log files, configuration and database files in runtime. Default location for these files is **logs**, **db** subfolders and folder of **librer** executable . If there are no write access rights to such folder, platform-specific folders are used for cache, settings and logs (provided by **appdirs** module). You can use --appdirs command line switch to force that behavior even when local folders are accessible. - +**librer** writes log files, configuration and record files in runtime. Default location for these files is **logs** and **data** subfolders of **librer** main directory. ## Technical information Record in librer is the result of a single scan operation and is shown as one of many top nodes in the main tree window. Contains a directory tree with collected custom data and CRC data. It is stored as a single .dat file in librer database directory. Its internal format is optimized for security, fast initial access and maximum compression (just check :)) Every section is a python data structure serialized by [pickle](https://docs.python.org/3/library/pickle.html) and compressed separately by [Zstandard](https://pypi.org/project/zstandard/) algorithm. The record file, once saved, is never modified afterward. It can only be deleted upon request or exported. All record files are independent of each other.Fuzzy matching is implemented using the SequenceMatcher function provided by the [difflib](https://docs.python.org/3/library/difflib.html) package. @@ -84,5 +83,11 @@ pip install -r requirements.txt python3 ./src/librer.py ``` +## Ideas for future development +- gather **custom data** (generated also by user scripts) not only as text but also as binary files and store them inside record file (e.g. image thumbnails etc.) +- calculate the **CRC** of scanned files and use it to search for duplicates among different records, verify current data with the saved file system image + +- comparing two records with each other. e.g. two scans of the same file system performed at different times + ## Licensing - **librer** is licensed under **[MIT license](./LICENSE)** diff --git a/requirements.txt b/requirements.txt index cf9073c..a424760 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ appdirs==1.4.4 send2trash==1.8.2 zstandard==0.22.0 psutil==5.9.6 +pympler==1.0.1 diff --git a/src/dialogs.py b/src/dialogs.py index 720217b..d7f8526 100644 --- a/src/dialogs.py +++ b/src/dialogs.py @@ -87,7 +87,8 @@ def __init__(self,parent,icon,bg_color,title,pre_show=None,post_close=None,min_w self.area_main.grid_columnconfigure(0, weight=1) self.area_buttons = Frame(widget,bg=self.bg_color) - self.area_buttons.pack(side='bottom',expand=0,fill='x') + self.area_buttons.pack(side='bottom',expand=0) + #,fill='x' self.wait_var=BooleanVar() self.wait_var.set(False) @@ -257,8 +258,10 @@ def __init__(self,parent,icon,bg_color,pre_show=None,post_close=None,min_width=5 self.prev_image[i]=None self.abort_button=Button(self.area_buttons, text='Abort', width=10,command=lambda : self.hide() ) + self.abort_single_button=Button(self.area_buttons, text='Abort single file', width=10,command=lambda : self.hide() ) - self.abort_button.pack(side='bottom', anchor='n',padx=5,pady=5) + self.abort_button.pack(side='left', anchor='w',padx=5,pady=5) + #self.abort_single_button.pack(side='right', anchor='e',padx=5,pady=5) frame_0=Frame(self.area_main,bg=self.bg_color) if ShowProgress: diff --git a/src/librer.py b/src/librer.py index c8c848c..adc68a7 100644 --- a/src/librer.py +++ b/src/librer.py @@ -82,12 +82,15 @@ CFG_KEY_SINGLE_DEVICE = 'single_device' CFG_KEY_find_size_min = 'find_size_min' +CFG_KEY_find_size_max = 'find_size_max' + +CFG_KEY_find_modtime_min = 'find_modtime_min' +CFG_KEY_find_modtime_max = 'find_modtime_max' + CFG_KEY_find_range_all = 'find_range_all' CFG_KEY_find_cd_search_kind = 'find_cd_search_kind' CFG_KEY_find_filename_search_kind = 'find_filename_search_kind' -CFG_KEY_find_size_max = 'find_size_max' - CFG_KEY_find_name_regexp = 'find_name_regexp' CFG_KEY_find_name_glob = 'find_name_glob' CFG_KEY_find_name_fuzz = 'find_name_fuzz' @@ -124,6 +127,9 @@ CFG_KEY_find_size_min:'', CFG_KEY_find_size_max:'', + + CFG_KEY_find_modtime_min:'', + CFG_KEY_find_modtime_max:'', CFG_KEY_find_name_regexp:'', CFG_KEY_find_name_glob:'', @@ -595,7 +601,7 @@ def help_cascade_post(): self.tooltip_message[str_self_progress_dialog_on_load_abort_button]='Abort loading.' self_progress_dialog_on_load.abort_button.configure(image=self.ico['cancel'],text='Abort',compound='left') - self_progress_dialog_on_load.abort_button.pack(side='bottom', anchor='n',padx=5,pady=5) + self_progress_dialog_on_load.abort_button.pack( anchor='center',padx=5,pady=5) self.action_abort=False self_progress_dialog_on_load.abort_button.configure(state='normal') @@ -1082,7 +1088,7 @@ def get_scan_dialog(self): max_tooltip = "Maximum size of file\nto aplly CD extraction\nmay be empty e.g. '100MB'" exec_tooltip = "Executable or batch script that will be run\nwith the file for extraction as last parameter.\nMay have other fixed parameters\nWill be executed with the full path of the scanned file\ne.g. '7z l', 'cat', 'my_extractor.sh', 'my_extractor.bat'" pars_tooltip = f"parameters of the CDE executable.\nUse '{PARAM_INDICATOR_SIGN}' sign to indicate 'file path'\nlocation, or leave it empty." - shell_tooltip = "Execute in system shell" + shell_tooltip = "Execute in system shell\nUse only when necessary." open_tooltip = "Set executable file as Custom Data Extractor..." timeout_tooltip = "Timeout limit in seconds for single CD extraction.\nAfter timeout executed process will be terminated\n\n'0' or no value means no timeout" test_tooltip = "Test Custom Data Extractor\non single manually selected file ..." @@ -1227,7 +1233,11 @@ def get_simple_progress_dialog_on_scan(self): self.simple_progress_dialog_on_scan.command_on_close = self.progress_dialog_abort - self.widget_tooltip(self.simple_progress_dialog_on_scan.abort_button,'') + self.widget_tooltip(self.simple_progress_dialog_on_scan.abort_button,'Abort test') + + str_simple_progress_dialog_scan_abort_button = str(self.simple_progress_dialog_on_scan.abort_button) + self.tooltip_message[str_simple_progress_dialog_scan_abort_button]='Abort test.' + self.simple_progress_dialog_on_scan_created = True return self.simple_progress_dialog_on_scan @@ -1414,6 +1424,9 @@ def get_find_dialog(self): self.find_size_min_var = StringVar() self.find_size_max_var = StringVar() + self.find_modtime_min_var = StringVar() + self.find_modtime_max_var = StringVar() + self.find_name_regexp_var = StringVar() self.find_name_glob_var = StringVar() self.find_name_fuzz_var = StringVar() @@ -1447,6 +1460,9 @@ def ver_number(var): self.find_size_min_var.set(ver_number(self.cfg.get(CFG_KEY_find_size_min))) self.find_size_max_var.set(ver_number(self.cfg.get(CFG_KEY_find_size_max))) + + self.find_modtime_min_var.set(ver_number(self.cfg.get(CFG_KEY_find_modtime_min))) + self.find_modtime_max_var.set(ver_number(self.cfg.get(CFG_KEY_find_modtime_max))) self.find_name_regexp_var.set(self.cfg.get(CFG_KEY_find_name_regexp)) self.find_name_glob_var.set(self.cfg.get(CFG_KEY_find_name_glob)) @@ -1465,6 +1481,9 @@ def ver_number(var): self.find_size_min_var.trace_add("write", lambda i,j,k : self.find_mod()) self.find_size_max_var.trace_add("write", lambda i,j,k : self.find_mod()) + + self.find_modtime_min_var.trace_add("write", lambda i,j,k : self.find_mod()) + self.find_modtime_max_var.trace_add("write", lambda i,j,k : self.find_mod()) self.find_name_regexp_var.trace_add("write", lambda i,j,k : self.find_mod()) self.find_name_glob_var.trace_add("write", lambda i,j,k : self.find_mod()) @@ -1547,12 +1566,12 @@ def ver_number(var): find_cd_frame.grid_columnconfigure(1, weight=1) - (find_size_frame := LabelFrame(sfdma,text='File size range',bd=2,bg=self.bg_color,takefocus=False)).grid(row=3,column=0,sticky='news',padx=4,pady=4) + (find_size_frame := LabelFrame(sfdma,text='File size',bd=2,bg=self.bg_color,takefocus=False)).grid(row=3,column=0,sticky='news',padx=4,pady=4) find_size_frame.grid_columnconfigure((0,1,2,3), weight=1) Label(find_size_frame,text='min: ',bg=self.bg_color,anchor='e',relief='flat',bd=2).grid(row=0, column=0, sticky='we',padx=4,pady=4) Label(find_size_frame,text='max: ',bg=self.bg_color,anchor='e',relief='flat',bd=2).grid(row=0, column=2, sticky='we',padx=4,pady=4) - + def validate_size_str(val): return bool(val == "" or val.isdigit()) @@ -1560,6 +1579,16 @@ def validate_size_str(val): #,validate="key",validatecommand=(entry_validator,"%P") Entry(find_size_frame,textvariable=self.find_size_min_var).grid(row=0, column=1, sticky='we',padx=4,pady=4) Entry(find_size_frame,textvariable=self.find_size_max_var).grid(row=0, column=3, sticky='we',padx=4,pady=4) + + (find_modtime_frame := LabelFrame(sfdma,text='File mod time',bd=2,bg=self.bg_color,takefocus=False)).grid(row=4,column=0,sticky='news',padx=4,pady=4) + find_modtime_frame.grid_columnconfigure((0,1,2,3), weight=1) + + Label(find_modtime_frame,text='min: ',bg=self.bg_color,anchor='e',relief='flat',bd=2).grid(row=0, column=0, sticky='we',padx=4,pady=4) + Label(find_modtime_frame,text='max: ',bg=self.bg_color,anchor='e',relief='flat',bd=2).grid(row=0, column=2, sticky='we',padx=4,pady=4) + + Entry(find_modtime_frame,textvariable=self.find_modtime_min_var).grid(row=0, column=1, sticky='we',padx=4,pady=4) + Entry(find_modtime_frame,textvariable=self.find_modtime_max_var).grid(row=0, column=3, sticky='we',padx=4,pady=4) + Button(self.find_dialog.area_buttons, text='Search', width=14, command=self.find_items ).pack(side='left', anchor='n',padx=5,pady=5) self.search_show_butt = Button(self.find_dialog.area_buttons, text='Show results', width=14, command=self.find_show_results ) @@ -1573,7 +1602,7 @@ def validate_size_str(val): #self.search_next_butt.pack(side='left', anchor='n',padx=5,pady=5) Button(self.find_dialog.area_buttons, text='Close', width=14, command=self.find_close ).pack(side='right', anchor='n',padx=5,pady=5) - sfdma.grid_rowconfigure(4, weight=1) + sfdma.grid_rowconfigure(5, weight=1) sfdma.grid_columnconfigure(0, weight=1) self.info_dialog_on_find = dialogs.LabelDialog(self.find_dialog.widget,(self.ico_librer,self.ico_record),self.bg_color,pre_show=lambda new_widget : self.pre_show(on_main_window_dialog=False,new_widget=new_widget),post_close=lambda : self.post_close(on_main_window_dialog=False)) @@ -2041,10 +2070,16 @@ def find_mod(self): self.find_params_changed=True elif self.cfg.get(CFG_KEY_find_range_all) != self.find_range_all.get(): self.find_params_changed=True + elif self.cfg.get(CFG_KEY_find_size_min) != self.find_size_min_var.get(): self.find_params_changed=True elif self.cfg.get(CFG_KEY_find_size_max) != self.find_size_max_var.get(): self.find_params_changed=True + + elif self.cfg.get(CFG_KEY_find_modtime_min) != self.find_modtime_min_var.get(): + self.find_params_changed=True + elif self.cfg.get(CFG_KEY_find_modtime_max) != self.find_modtime_max_var.get(): + self.find_params_changed=True elif self.cfg.get(CFG_KEY_find_name_regexp) != self.find_name_regexp_var.get(): self.find_params_changed=True @@ -2141,6 +2176,9 @@ def find_items(self): find_size_min = self.find_size_min_var.get() find_size_max = self.find_size_max_var.get() + + find_modtime_min = self.find_modtime_min_var.get() + find_modtime_max = self.find_modtime_max_var.get() find_name_regexp = self.find_name_regexp_var.get() find_name_glob = self.find_name_glob_var.get() @@ -2181,7 +2219,16 @@ def find_items(self): if max_num0: + simple_progress_dialog_scan_update_lab_text(0,f'timeout: {int(time_left)}') + else: + simple_progress_dialog_scan_update_lab_text(0,'Timeout') + self.output_list.append(f'Timeout {timeout_int}s.') + self.kill_test() + + self_main_after(25,lambda : wait_var_set(not wait_var_get())) + self_main_wait_variable(wait_var) + + test_thread.join() simple_progress_dialog_scan.hide(True) - result_tuple = io_list[0][7] + output = '\n'.join(self.output_list) - returncode,output = result_tuple - self.get_text_dialog_on_scan().show(f'CDE Test finished {"OK" if returncode==0 else "with Error"}',output) + self.get_text_dialog_on_scan().show(f'CDE Test finished {"OK" if self.returncode[0]==0 else "with Error"}',output) def cde_entry_open(self,e) : initialdir = self.last_dir if self.last_dir else self.cwd diff --git a/src/record.py b/src/record.py index c027db9..fab7317 100644 --- a/src/record.py +++ b/src/record.py @@ -168,11 +168,12 @@ def printer(): def print_info(*args): print('#',*args) -buffer_size = 1024*1024*64 -sys.stdout = io.TextIOWrapper(sys.stdout.detach(), write_through=True, line_buffering=False) -sys.stdout._CHUNK_SIZE = buffer_size if __name__ == "__main__": + buffer_size = 1024*1024*64 + sys.stdout = io.TextIOWrapper(sys.stdout.detach(), write_through=True, line_buffering=False) + sys.stdout._CHUNK_SIZE = buffer_size + VER_TIMESTAMP = get_ver_timestamp() args=parse_args(VER_TIMESTAMP)