-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathExtractSourceFilesInfo.py
204 lines (169 loc) · 8.65 KB
/
ExtractSourceFilesInfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import pathlib
import subprocess
import json
from copy import deepcopy
from pydriller import RepositoryMining
import matplotlib.pyplot as plt
import networkx as nx
class ExtractSourceFilesInfo:
# repository_path = name of the repository (same of github, and same of local repo)
# path_to_file = path to the file, in order to avoid the computation of test files
def __init__(self, repository_path, path_to_file):
self._repository = RepositoryMining("https://www.github.com/" + repository_path + ".git")
self._repository_path = repository_path
self._path_to_file = path_to_file
self._classNames = []
# This function creates the file-developers dictionary
def getFileDevDictionary(self):
# dictionary instance
commitDict = dict()
# Iterating the commits...
for commit in self._repository.traverse_commits():
# N.B. Each commit may contain more than one modification: this is because a developer may modify more than
# one file, and so may commit more modified file.
# Iterating the modifications in the commit...
for m in commit.modifications:
# if the filename of the modification 'm' isn't already in the dictionary, let's add it as key of
# commitDict: the corresponding value is another dictionary!
# commitDict = {'filename': {} }
if m.filename not in commitDict:
commitDict[m.filename] = dict()
# if the author modify the file 'filename' for the FIRST TIME, let's put the author name as a key of
# the internal dictionary
# (in turn, it is the value of the corresponding filename of the commitDict dictionary)
# and '1' as value: this value will be the counter of times that the author modify that file.!
if commit.author.name not in commitDict[m.filename]:
commitDict[m.filename][commit.author.name] = 1
# if the author modifiy the file 'filename' for the SECOND TIME (or more), let's increase the
# corresponding value!
else:
commitDict[m.filename][commit.author.name] += 1
# Create the graph
y = nx.Graph()
file_name_list = []
committer_list = []
for x, committers in commitDict.items():
file_name_list.append(x)
for committer, num_commit in committers.items():
if committer not in committer_list:
committer_list.append(committer)
# Add edges to the graph
y.add_nodes_from(file_name_list, bipartite=0)
y.add_nodes_from(committer_list, bipartite=1)
list_to_add = []
for filename, committers in commitDict.items():
for committer, num_commit in committers.items():
list_to_add.append((filename, committer))
y.add_edges_from(list_to_add)
pos = nx.spring_layout(y, k=0.4, iterations=20)
nx.draw_networkx_nodes(y, pos, node_size=40)
nx.draw_networkx_edges(y, pos, edgelist=y.edges, edge_color="b", style="solid")
nx.draw_networkx_labels(y, pos, font_size=7, font_family="sans-serif")
# Show the graph
plt.axis("off")
plt.figure(figsize=(10, 8), dpi=300)
plt.show()
return commitDict
# This function creates the file-file developers dictionary
def getFileFileDictionary(self):
repo_dir = self._repository_path + "/" + self._path_to_file
subprocess.call(
['java', '-jar', 'depends/depends.jar', 'java', repo_dir, 'outputDep', '--auto-include',
'-d=depends'])
def getFileFileMatrix(self):
self.getFileFileDictionary()
with open("depends/outputDep.json") as f:
data = json.load(f)
# Get class names of the entire project
name_of_classes = list()
for key in data['variables']:
filename = pathlib.PureWindowsPath(key)
# Convert path to the right format for the current operating system
path = pathlib.PurePath(filename)
name_of_classes.append(path.name)
self._classNames = name_of_classes
dependencies = list()
dependenciesRow = list()
# Iterating all the pairs of classes that have dependencies: index goes from 0 to n (#number of classes)
for i in range(0, len(data["variables"])):
# Iterating all classes (from 0 to n)
for j in range(0, len(data["variables"])):
# Boolean variable that tell us whether any dependencies are found
noDependencies = True
# Iterating the dependencies found by "Depends":
for index in range(0, len(data["cells"])):
# If there are dependencies from the class indexed as 'i'...
if (data["cells"][index]["src"] == i):
# ...to the class indexed as 'j'
if (data["cells"][index]["dest"] == j):
# DEPENDENCY FOUND! Put the boolean = False and compute the sum of the dependencies!
noDependencies = False
dependenciesRow.append(sum(data["cells"][index]["values"].values()))
# No dependencies between the class 'i' and the class 'j': put 0 in the list
if (noDependencies):
dependenciesRow.append(0)
# We are going to the next row, this means that 'i' is going to change (another class is going to be
# analyzed): let's copy in a support list the 'partialDepencies' list, in order to save results in the
# 'dependencies' matrix, and re-use the 'dependenciesRow' list in another iteration!
supportList = deepcopy(dependenciesRow) # copy
del dependenciesRow[:] # empty the list
dependencies.extend([supportList]) # dependencies matrix filling
k = 0
dict_to_return = dict()
for class_name in name_of_classes:
j = 0
dict_to_return[class_name] = dict()
for class_name_2 in name_of_classes:
if dependencies[k][j] > 0:
dict_to_return[class_name][class_name_2] = dependencies[k][j]
j = j + 1
k = k + 1
# Create the graph
y = nx.Graph()
for file, file_dep in dict_to_return.items():
for file2, val in file_dep.items():
y.add_edge(file, file2, weight=val)
# Add the edges to the graph
pos = nx.spring_layout(y)
nx.draw_networkx_nodes(y, pos, node_size=70)
nx.draw_networkx_edges(y, pos, edgelist=y.edges, edge_color="b", style="solid")
nx.draw_networkx_labels(y, pos, font_size=5, font_family="sans-serif")
# Print the graph
plt.axis("off")
plt.show()
return dependencies, name_of_classes
def getFileDevMatrix(self):
# Getting data
data = self.getFileDevDictionary()
# Get all file names
fileNames = (list)(data.keys())
devNames = []
# Get all developers names
for file in self._classNames:
for key in data[file].keys():
if key not in devNames:
# print(key)
devNames.append(key)
# File dev matrix
fileDevMatrix = list()
# A list, used for each row of the matrix: at each iteration is used and then empty, in order
# to re-use it in the next iteration
fileDevRow = []
# Iterating file names
for i in range(0, len(self._classNames)):
# Iterating developers names
for j in range(0, len(devNames)):
# If a developer name is in the dictionary associated to a certain file... (this means that he made
# at least 1 commit on that file
if (devNames[j] in data[self._classNames[i]]):
# append the number of commits on that file
fileDevRow.append(data[self._classNames[i]][devNames[j]])
else: # otherwise put 0
fileDevRow.append(0)
# We are going to the next row, this means that 'i' is going to change (another file is going to be
# analyzed): let's copy in a support list the 'fileDevRow' list, in order to save results in the
# matrix, and re-use the 'fileDevRow' list in another iteration!
supportList = deepcopy(fileDevRow) # copy
del fileDevRow[:] # empty the list
fileDevMatrix.extend([supportList]) # matrix filling
return fileDevMatrix, devNames