-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProcessJSON.py
91 lines (75 loc) · 3.63 KB
/
ProcessJSON.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
import csv
from collections import defaultdict
from pathlib import Path
import os
# Define the directories
pycefr_dir = '/pycefr' # Path to the PyCEFR scripts
json_data_dir = os.path.join(pycefr_dir, 'DATA_JSON') # Where JSON data is stored
output_dir = '/CompetencyScore'
# Ensure output directory exists
Path(output_dir).mkdir(parents=True, exist_ok=True)
def process_json_files():
"""
Processes JSON files generated by PyCEFR to extract competency levels and
generate summary CSV and JSON files.
"""
for json_file in Path(json_data_dir).glob('*.json'):
process_json_file(json_file)
def process_json_file(json_file):
"""
Processes a single JSON file generated by PyCEFR to extract competency levels and
generate summary CSV and JSON files.
"""
with open(json_file) as f:
data = json.load(f)
commit_hash = json_file.stem
all_files_data = data.get(commit_hash, {})
after_sum, before_sum = defaultdict(int), defaultdict(int)
for file_name, file_content in all_files_data.items():
parts = file_name.split('_')
if len(parts) < 7: # Adjusted for time format inclusion
print(f"Unexpected filename structure: {file_name}")
continue
project_name, author_id, author_date_format, time_format, status = parts[1], parts[2], parts[3], parts[4], parts[5]
for level, score in file_content['Levels'].items():
if 'after' in status:
after_sum[level] += score
elif 'before' in status:
before_sum[level] += score
diff = {level: after_sum[level] - before_sum.get(level, 0) for level in set(after_sum) | set(before_sum)}
generate_summary_files(commit_hash, project_name, author_id, author_date_format, time_format, after_sum, before_sum, diff)
def generate_summary_files(commit_hash, project_name, author_id, author_date_format, time_format, after_sum, before_sum, diff):
"""
Generates CSV and JSON summary files for competency levels, including time format.
"""
output_csv_dir = os.path.join(output_dir, 'CSV', project_name, author_id)
output_json_dir = os.path.join(output_dir, 'JSON', project_name, author_id)
Path(output_csv_dir).mkdir(parents=True, exist_ok=True)
Path(output_json_dir).mkdir(parents=True, exist_ok=True)
summary_base = f"{commit_hash}_summary_{author_date_format}_{time_format}"
csv_path = os.path.join(output_csv_dir, f"{summary_base}.csv")
json_path = os.path.join(output_json_dir, f"{summary_base}.json")
with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['CommitHash', 'ProjectName', 'AuthorID', 'AuthorDateFormat', 'TimeFormat', 'Level', 'After', 'Before', 'Difference'])
for level in sorted(set(after_sum.keys()).union(before_sum.keys())):
writer.writerow([
commit_hash, project_name, author_id, author_date_format, time_format,
level, after_sum.get(level, 0), before_sum.get(level, 0), diff.get(level, 0)
])
with open(json_path, 'w') as jsonfile:
json.dump({
'CommitHash': commit_hash,
'ProjectName': project_name,
'AuthorID': author_id,
'AuthorDateFormat': author_date_format,
'TimeFormat': time_format,
'Levels': {
'After': dict(after_sum),
'Before': dict(before_sum),
'Difference': dict(diff)
}
}, jsonfile, indent=4)
# Call the process_json_files() function if you want to run it immediately
process_json_files()