Skip to content

Commit

Permalink
make sure it works with corrupted
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasFaria committed Apr 24, 2024
1 parent 7ed9518 commit f9ae9d4
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
3 changes: 0 additions & 3 deletions utils/etl_monitoring.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Set environment variables


LOG_FILE_PATH_S3_IN=$NAMESPACE/log_files/raw/
LOG_FILE_PATH_S3_OUT=$NAMESPACE/log_files/preprocessed/

Expand Down Expand Up @@ -36,7 +34,6 @@ for file in *.log.*; do
# Rename the file
mv "$file" "$new_name"

echo "Renamed: $file to $new_name"
fi
done
cd ../..
Expand Down
12 changes: 10 additions & 2 deletions utils/extract_prod_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,23 @@ def extract_log_info(f):
"fasttextVersion": [],
}

for line in f:
lines = f.readlines()
for line in lines:
idx = line.find("CodificationBilan")
is_empty = bool(re.match(r"^\s*$", line))
is_valid_line = bool(re.search(PATTERN, line))

if (is_empty or not is_valid_line) and idx == -1:
continue

data = extract_data_by_line(line)
if line == lines[-1]:
# Some logs are corrupted so we have to deal with that
try:
data = extract_data_by_line(line)
except AttributeError:
print(f"ERROR for {line}")
else:
data = extract_data_by_line(line)

for key in results.keys():
results[key].append(data[key])
Expand Down

0 comments on commit f9ae9d4

Please sign in to comment.