-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassparser.py
97 lines (83 loc) · 3.62 KB
/
classparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import javalang
import re
class ClassParser:
def __init__(self, file_path):
self.file_path = file_path
self.code = self.code_cleanup(open(file_path).read())
self.tree = javalang.parse.parse(self.code)
def code_cleanup(self, code):
code = '\n'.join([line for line in code.splitlines() if line.strip()])
code = re.sub(r'^\s*import .*;$', '', code, flags=re.MULTILINE)
code = re.sub(r'^\s*package .*;$', '', code, flags=re.MULTILINE)
code = re.sub(r'@\w+(\.\w+)*(\s*\(.*?\))?', '', code, flags=re.DOTALL)
code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
code = re.sub(r'^\s*\*.*$', '', code, flags=re.MULTILINE)
code = re.sub(r'^\s*\*/.*$', '', code, flags=re.MULTILINE)
code = re.sub(r'//.*', '', code)
code = '\n'.join([line for line in code.splitlines() if line.strip()])
return code
def single_line_cleanup(self, code, proto = False):
java_code = code
# remove all whitespaces, tabs, and newlines
cleaned_code = re.sub(r'[\t\n]+', '', java_code) # Remove tabs and newlines
cleaned_code = re.sub(r' {2,}', ' ', cleaned_code)
cleaned_code = re.sub(r'^ +', '', cleaned_code, flags=re.M)
# remove { and ;
if proto:
cleaned_code = re.sub(r'{', '', cleaned_code)
cleaned_code = re.sub(r';', '', cleaned_code)
cleaned_code = cleaned_code.strip()
return cleaned_code
def get_methods(self):
methods = []
for _, node in self.tree.filter(javalang.tree.MethodDeclaration):
methods.append(node)
return methods
def get_constructors(self):
constructors = []
for _, node in self.tree.filter(javalang.tree.ConstructorDeclaration):
constructors.append(node)
return constructors
def get_method_prototypes(self):
methods = self.get_methods()
constructors = self.get_constructors()
for constructor in constructors:
methods.append(constructor)
prototypes = []
for method in methods:
prototype = ''
split_code = self.code.split('\n')
for i in range(method.position.line - 1, len(split_code)):
prototype += split_code[i]
if '{' in split_code[i]:
break
if ';' in split_code[i]:
break
prototypes.append(self.single_line_cleanup(prototype, proto=True))
return prototypes
def get_full_methods(self):
methods = self.get_methods()
constructors = self.get_constructors()
for constructor in constructors:
methods.append(constructor)
full_methods = []
for method in methods:
split_code = self.code.split('\n')
start_line = method.position.line - 1 # Convert to 0-based index
# Find the end of the method by tracking braces
brace_count = 0
end_line = start_line
method_started = False
for i in range(start_line, len(split_code)):
line = split_code[i]
brace_count += line.count("{") - line.count("}")
if "{" in line:
method_started = True
if method_started and brace_count == 0:
end_line = i
break
# Extract method lines
full_method = split_code[start_line:end_line + 1]
full_method = '\n'.join(full_method)
full_methods.append(full_method)
return full_methods