Skip to content

Commit

Permalink
cleanup, refactoring, test CDE button
Browse files Browse the repository at this point in the history
  • Loading branch information
piotrj committed Nov 18, 2023
1 parent 3116c0c commit a910463
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 142 deletions.
81 changes: 37 additions & 44 deletions src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,26 @@

from collections import defaultdict

import re
from re import compile as re_compile
from re import IGNORECASE

from signal import SIGTERM

from time import time

import gzip
import lzma
import zlib
import pickle
from gzip import open as gzip_open
from gzip import decompress as gzip_decompress
from gzip import compress as gzip_compress

import difflib

from executor import Executor
from pickle import dump as pickle_dump
from pickle import load as pickle_load

from difflib import SequenceMatcher
from subprocess import STDOUT, TimeoutExpired, PIPE, check_output
#, Popen

import pathlib
from executor import Executor

from pathlib import Path as pathlib_Path

def bytes_to_str(num):
if num < 1024:
Expand Down Expand Up @@ -191,9 +191,9 @@ def file_name(self):
def abort(self):
self.abort_action = True

CRC_BUFFER_SIZE=4*1024*1024
def calc_crc(self,fullpath,size):
buf = bytearray(self.CRC_BUFFER_SIZE)
CRC_BUFFER_SIZE=4*1024*1024
buf = bytearray(CRC_BUFFER_SIZE)
view = memoryview(buf)

self.crc_progress_info=0
Expand Down Expand Up @@ -256,7 +256,7 @@ def scan_rec(self, path, dictionary,check_dev=True,dev_call=None) :

is_dir,is_file,is_symlink = entry.is_dir(),entry.is_file(),entry.is_symlink()

self.ext_statistics[pathlib.Path(entry).suffix]+=1
self.ext_statistics[pathlib_Path(entry).suffix]+=1

self.info_line_current = entry_name
try:
Expand Down Expand Up @@ -467,8 +467,8 @@ def prepare_custom_data_pool_rec(self,dictionary,parent_path):

def get_cd_text(self,cd_data,is_compressed):
#'utf-8'
#return gzip.decompress(cd_data).decode("ISO-8859-1") if is_compressed else cd_data
return gzip.decompress(cd_data).decode("ISO-8859-1") if is_compressed else cd_data
#return gzip_decompress(cd_data).decode("ISO-8859-1") if is_compressed else cd_data
return gzip_decompress(cd_data).decode("ISO-8859-1") if is_compressed else cd_data

def extract_custom_data(self):
scan_path = self.db.scan_path
Expand Down Expand Up @@ -501,7 +501,7 @@ def extract_custom_data(self):
if crc:
self.info_line_current = f'{subpath} CRC calculation ({bytes_to_str(size)})'
crc_val = self.calc_crc(full_file_path,size)
print(crc_val)
#print(crc_val)

self.info_line_current = f'{subpath} ({bytes_to_str(size)})'

Expand All @@ -514,8 +514,8 @@ def extract_custom_data(self):
result = None
is_compressed = False
elif output_len>128:
result = gzip.compress(bytes(output,"ISO-8859-1")) #"utf-8"
#result = gzip.compress(output) #"utf-8"
result = gzip_compress(bytes(output,"ISO-8859-1")) #"utf-8"
#result = gzip_compress(output) #"utf-8"
is_compressed = True
else:
result = output
Expand Down Expand Up @@ -549,15 +549,6 @@ def extract_custom_data(self):

self.save()

#file_path=sep.join([self_db_dir,self.file_name()])
#self.log.info('saving %s' % file_path)

#with gzip.open(file_path, "wb") as gzip_file:
# pickle.dump(self_db, gzip_file)

#for rule,stat in zip(self_db.cde_list,self_db.cd_stat):
# print('cd_stat',rule,stat)

search_kind_code_tab={'dont':0,'without':1,'error':2,'regexp':3,'glob':4,'fuzzy':5}

def set_data(self):
Expand Down Expand Up @@ -676,7 +667,7 @@ def find_items(self,
if not cd_func_to_call(cd_txt):
continue
except Exception as e:
self.log.error('find_items_rec:%s on:\n%s',str(e),str(cd_txt) )
self.log.error('find_items_rec:%s',str(e) )
continue

else:
Expand All @@ -701,21 +692,17 @@ def save(self) :
file_path=sep.join([self.db_dir,file_name])
self.log.info('saving %s' % file_path)

with gzip.open(file_path, "wb") as gzip_file:
pickle.dump(self.db, gzip_file)
with gzip_open(file_path, "wb") as gzip_file:
pickle_dump(self.db, gzip_file)

self.info_line = ''

def load(self,db_dir,file_name):
self.log.info('loading %s' % file_name)
try:
full_file_path = sep.join([db_dir,file_name])
if True:
with gzip.open(full_file_path, "rb") as gzip_file:
self.db = pickle.load(gzip_file)
else:
with lzma.open(full_file_path, "rb") as gzip_file:
self.db = pickle.load(gzip_file)
with gzip_open(full_file_path, "rb") as gzip_file:
self.db = pickle_load(gzip_file)

global data_format_version
if self.db.data_format_version != data_format_version:
Expand All @@ -733,6 +720,14 @@ class LibrerCore:
records = set()
db_dir=''

def test_cde(self,executable,timeout,file_to_test):
exe = Executor()
cde_run_list = executable + [file_to_test]

cd_ok,output = exe.run(cde_run_list,timeout)

return cd_ok,output

def __init__(self,db_dir,log):
self.records = set()
self.db_dir = db_dir
Expand Down Expand Up @@ -869,11 +864,11 @@ def find_items_in_all_records(self,
elif find_filename_search_kind == 'glob':
if name_case_sens:
#name_func_to_call = lambda x : fnmatch(x,name_expr)
name_func_to_call = lambda x : re.compile(translate(name_expr)).match(x)
name_func_to_call = lambda x : re_compile(translate(name_expr)).match(x)
else:
name_func_to_call = lambda x : re.compile(translate(name_expr), IGNORECASE).match(x)
name_func_to_call = lambda x : re_compile(translate(name_expr), IGNORECASE).match(x)
elif find_filename_search_kind == 'fuzzy':
name_func_to_call = lambda x : True if difflib.SequenceMatcher(None, name_expr, x).ratio()>filename_fuzzy_threshold_float else False
name_func_to_call = lambda x : True if SequenceMatcher(None, name_expr, x).ratio()>filename_fuzzy_threshold_float else False
else:
name_func_to_call = None
else:
Expand All @@ -887,18 +882,16 @@ def find_items_in_all_records(self,
elif find_cd_search_kind == 'glob':
if cd_case_sens:
#cd_func_to_call = lambda x : fnmatch(x,cd_expr)
cd_func_to_call = lambda x : re.compile(translate(cd_expr)).match(x)
cd_func_to_call = lambda x : re_compile(translate(cd_expr)).match(x)
else:
cd_func_to_call = lambda x : re.compile(translate(cd_expr), IGNORECASE).match(x)
cd_func_to_call = lambda x : re_compile(translate(cd_expr), IGNORECASE).match(x)
elif find_cd_search_kind == 'fuzzy':
cd_func_to_call = lambda x : True if difflib.SequenceMatcher(None, name_expr, x).ratio()>cd_fuzzy_threshold_float else False
cd_func_to_call = lambda x : True if SequenceMatcher(None, name_expr, x).ratio()>cd_fuzzy_threshold_float else False
else:
cd_func_to_call = None
else:
cd_func_to_call = None

#print('fuzz:',difflib.SequenceMatcher(None, 'hello world', 'hello').ratio())

self.find_res_quant = 0
sel_range = [range_par] if range_par else self.records

Expand Down
34 changes: 19 additions & 15 deletions src/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
#
####################################################################################

import psutil

from subprocess import Popen, STDOUT, TimeoutExpired, PIPE, check_output, CalledProcessError
from subprocess import Popen, STDOUT, PIPE
from threading import Thread
from time import time
from time import sleep

from psutil import Process
from signal import SIGTERM


class Executor :
def __init__(self):
self.command_list_to_execute = None
Expand All @@ -60,7 +60,7 @@ def run(self,command_list_to_execute,timeout=None):
while self.running:
if timeout:
if time()-start>timeout:
self.kill()
self.kill(self.pid)
if not self.killed:
error_message += '\nKilled after timeout.'
self.killed = True
Expand All @@ -69,9 +69,9 @@ def run(self,command_list_to_execute,timeout=None):

return self.res_ok and not self.killed,(self.output if self.output else '') + error_message

def kill(self):
pid = self.pid
proc = psutil.Process(pid)
def kill(self,pid):

proc = Process(pid)

#proc.send_signal(SIGSTOP)
#proc.send_signal(SIGINT)
Expand All @@ -81,7 +81,7 @@ def kill(self):

try:
proc.send_signal(SIGTERM)
print('SIGTERM send to',pid)
#print('SIGTERM send to',pid)

except Exception as e:
print(e)
Expand All @@ -91,20 +91,24 @@ def run_in_thread(self):
if self.command_list_to_execute:
self.output = ''
output_list = []
output_list_append = output_list.append

try:
self.process = Popen(self.command_list_to_execute, start_new_session=True, stdout=PIPE, stderr=STDOUT)
self.pid = self.process.pid
proc = Popen(self.command_list_to_execute, stdout=PIPE, stderr=STDOUT)
self.pid = proc.pid

proc_stdout_readline = proc.stdout.readline
proc_poll = proc.poll
while True:
output=self.process.stdout.readline().decode("ISO-8859-1")
output_list.append(output)
if not output and self.process.poll() is not None:
output=proc_stdout_readline().decode("ISO-8859-1")
output_list_append(output)
if not output and proc_poll() is not None:
break

except Exception as e:
self.res_ok = False
output_list.append(str(e))
output_list_append(str(e))
print(e)

self.output = ''.join(output_list)
self.command_list_to_execute=None
Expand Down
Loading

0 comments on commit a910463

Please sign in to comment.