Skip to content

Commit 02f4eee

Browse files
authoredJul 30, 2020
extract commit metrics with related repo release
1 parent c333a19 commit 02f4eee

File tree

1 file changed

+223
-0
lines changed

1 file changed

+223
-0
lines changed
 

‎commitMetrics-repoVersion.py

+223
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
import csv
2+
import json
3+
import io
4+
import pycurl
5+
from datetime import datetime, timezone
6+
from pydriller import GitRepository, RepositoryMining
7+
import pytz
8+
import math
9+
10+
11+
accessTokenCounter = 0
12+
accessToken = ["7780bc2602cf4b08f2226df2a7bd10322889e794",
13+
"a3faa7537be29d679d1fe4fc164cea9a868871ec",
14+
"9cbaddca8a3a0c7cd0392015b1aaea61b980f10f",
15+
"3bb16fb747687219525d25d211e6f9693cafeff4",
16+
"035918431202a5f530cba91ef6fabfa381ebaa9c",
17+
"54ad10a8f93229a8281da7aa08d1b1c51c6ed94a",
18+
"66370591addc2cbe18730c60049918d12114c295",
19+
"000bfcb24d207efc28634066d954441c6c02c42a",
20+
"9a485fb6b838551899bde1304035a6dfd17b8fb8",
21+
"464b5762578f185aebb2e1bbbe033aa7abc416d8"]
22+
23+
24+
def getReleases(repo):
25+
repoReleases = {}
26+
startDate = datetime.now(timezone.utc)
27+
28+
# repoReleases = {}
29+
pageCounter = 1
30+
global accessTokenCounter
31+
releaseCounter = 0
32+
while True:
33+
output = io.BytesIO()
34+
# result = ""
35+
conn = pycurl.Curl()
36+
conn.setopt(pycurl.USERPWD, "e.morovati@yahoo.com:{}".format(accessToken[accessTokenCounter % 10]))
37+
conn.setopt(pycurl.URL,
38+
f"https://api.github.com/repos/{repo}/releases?per_page=100&page={pageCounter}")
39+
conn.setopt(pycurl.WRITEFUNCTION, output.write)
40+
conn.perform()
41+
42+
accessTokenCounter += 1
43+
pageCounter += 1
44+
45+
result = output.getvalue().decode()
46+
releases = json.loads(result)
47+
48+
if not releases:
49+
break
50+
utc = pytz.UTC
51+
for release in releases:
52+
endDate = startDate
53+
startDate = datetime.strptime(release['created_at'], "%Y-%m-%dT%H:%M:%SZ")
54+
# print(f"release {releaseCounter} is between {startDate} and {endDate}")
55+
startDate = utc.localize(startDate)
56+
# endDate = utc.localize(endDate)
57+
repoReleases.update({releaseCounter: [startDate, endDate]})
58+
releaseCounter += 1
59+
60+
repoReleases.update({releaseCounter:[utc.localize(datetime.strptime('1990-01-01T00:00:00Z', "%Y-%m-%dT%H:%M:%SZ")),startDate]})
61+
62+
return repoReleases
63+
64+
65+
def findRelease(commitDate , repoReleases):
66+
release = 0
67+
68+
for item in repoReleases.keys():
69+
releaseDate = repoReleases.get(item)
70+
# if (commitDate <= utc.localize(releaseDate[1])) and (commitDate > utc.localize(releaseDate[0])):
71+
if (commitDate <= releaseDate[1]) and (commitDate > releaseDate[0]):
72+
release = item
73+
break
74+
75+
return release
76+
77+
78+
def main(commitMetricFile):
79+
counter = 0
80+
users = {} # name of users and number of commits by each users
81+
projectFiles = {} # last date of each file change
82+
fileContributors = {}
83+
84+
repo = "apache/arrow"
85+
86+
crossLangCommitList = []
87+
crossLangFlag = False
88+
89+
# cross-language bug-inducing commits
90+
crossLangCommits = open('results/arrow/arrowRealCJavaCommits.csv', encoding="ISO-8859-1")
91+
crossLangCommitsReader = csv.reader(crossLangCommits, delimiter=',')
92+
for commit in crossLangCommitsReader:
93+
crossLangCommitList.append(commit[0])
94+
95+
#get all commits
96+
closedCommits = open('results/arrow/arrowClosedCommits.csv', encoding="ISO-8859-1")
97+
closedCommitsReader = csv.reader(closedCommits, delimiter=',')
98+
99+
# get all releases
100+
repoReleases = getReleases(repo)
101+
102+
103+
csvOut = open('results/arrow/arrowSortedCommitMetrics.csv', 'w')
104+
writer = csv.writer(csvOut)
105+
106+
# add header to the CSV file
107+
writer.writerow(["commit SHA", "commit date", "release", "added LOC", "deleted LOC", "churn LOC", "entropy", "change age", "changed files",
108+
"contributors", "developer-exp", "cross-lang"])
109+
110+
gr = GitRepository('/home/mmm/Projects/arrow')
111+
112+
for row in reversed(list(closedCommitsReader)):
113+
commit = gr.get_commit(row[0])
114+
crossLangFlag = False
115+
counter += 1
116+
rowToWrite = []
117+
rowToWrite.append(row[0])
118+
119+
commitDate = commit.author_date
120+
author = commit.author.name
121+
rowToWrite.append(commitDate)
122+
123+
rowToWrite.append(findRelease(commitDate, repoReleases))
124+
125+
addedLOC = 0
126+
deletedLOC = 0
127+
churnLOC = 0
128+
entropy = 0
129+
changesAge = 0
130+
developerContributor = 0
131+
132+
authorExperience = users.get(author, 0)
133+
commitfiles = commit.modifications
134+
changedFiles = len(commitfiles)
135+
136+
for commitFile in commitfiles:
137+
addedLOC += commitFile.added
138+
deletedLOC += commitFile.removed
139+
churnLOC += commitFile.added + commitFile.removed
140+
fileName = f"{commitFile.new_path}/{commitFile.filename}"
141+
developerContributor = developerContributor + len(fileContributors.get(fileName, ''))
142+
if fileName in projectFiles.keys():
143+
fileLastChange = projectFiles.get(fileName)
144+
changesAge = changesAge + (commitDate - fileLastChange).days
145+
146+
# calculating entroopy
147+
for commitFile in commitfiles:
148+
149+
fileChanges = commitFile.added + commitFile.removed
150+
if fileChanges == 0:
151+
changedLOC = 1
152+
else:
153+
changedLOC = fileChanges
154+
155+
if churnLOC == 0:
156+
churnLOC = changedLOC
157+
tmpEntropy = math.log2(changedLOC / churnLOC) * (-1)
158+
# print("entropy : " + str(tmpEntropy))
159+
entropy += tmpEntropy
160+
161+
if changedFiles != 0:
162+
addedLOC = addedLOC / changedFiles
163+
deletedLOC = deletedLOC / changedFiles
164+
churnLOC = churnLOC / changedFiles
165+
changesAge = changesAge / changedFiles
166+
entropy = entropy / changedFiles
167+
developerContributor = developerContributor / changedFiles
168+
169+
170+
if row[0] in crossLangCommitList:
171+
crossLangFlag = True
172+
173+
rowToWrite.append(addedLOC)
174+
rowToWrite.append(deletedLOC)
175+
rowToWrite.append(churnLOC)
176+
rowToWrite.append(entropy)
177+
rowToWrite.append(changesAge)
178+
rowToWrite.append(changedFiles)
179+
rowToWrite.append(developerContributor)
180+
rowToWrite.append(authorExperience)
181+
rowToWrite.append(crossLangFlag)
182+
writer.writerow(rowToWrite)
183+
184+
print(f"{counter} commit is parsed")
185+
186+
#///////////////////////////////
187+
# calculate author experience
188+
if author in users.keys():
189+
exp = users.get(author)
190+
exp += 1
191+
users[author] = exp
192+
else:
193+
users.update({author: 1})
194+
195+
# calculate age of last change of each file & number of developer contributor in the file
196+
for commitfile in commitfiles:
197+
198+
# age of last change of each file
199+
fileName = f"{commitFile.new_path}/{commitFile.filename}"
200+
if fileName in projectFiles.keys():
201+
projectFiles[fileName] = commit.author_date
202+
else:
203+
projectFiles.update({fileName: commit.author_date})
204+
205+
# number of contributor in each file
206+
contribitors = []
207+
if fileName in fileContributors.keys():
208+
contribitors = fileContributors.get(fileName)
209+
if author in contribitors:
210+
break
211+
else:
212+
contribitors.append(author)
213+
fileContributors[fileName] = contribitors
214+
else:
215+
contribitors.append(author)
216+
fileContributors.update({fileName: contribitors})
217+
csvOut.close()
218+
219+
return True
220+
221+
222+
if __name__ == '__main__':
223+
main("test")

0 commit comments

Comments
 (0)
Please sign in to comment.