1
+ import csv
2
+ import json
3
+ import io
4
+ import pycurl
5
+ from datetime import datetime , timezone
6
+ from pydriller import GitRepository , RepositoryMining
7
+ import pytz
8
+ import math
9
+
10
+
11
+ accessTokenCounter = 0
12
+ accessToken = ["7780bc2602cf4b08f2226df2a7bd10322889e794" ,
13
+ "a3faa7537be29d679d1fe4fc164cea9a868871ec" ,
14
+ "9cbaddca8a3a0c7cd0392015b1aaea61b980f10f" ,
15
+ "3bb16fb747687219525d25d211e6f9693cafeff4" ,
16
+ "035918431202a5f530cba91ef6fabfa381ebaa9c" ,
17
+ "54ad10a8f93229a8281da7aa08d1b1c51c6ed94a" ,
18
+ "66370591addc2cbe18730c60049918d12114c295" ,
19
+ "000bfcb24d207efc28634066d954441c6c02c42a" ,
20
+ "9a485fb6b838551899bde1304035a6dfd17b8fb8" ,
21
+ "464b5762578f185aebb2e1bbbe033aa7abc416d8" ]
22
+
23
+
24
+ def getReleases (repo ):
25
+ repoReleases = {}
26
+ startDate = datetime .now (timezone .utc )
27
+
28
+ # repoReleases = {}
29
+ pageCounter = 1
30
+ global accessTokenCounter
31
+ releaseCounter = 0
32
+ while True :
33
+ output = io .BytesIO ()
34
+ # result = ""
35
+ conn = pycurl .Curl ()
36
+ conn .setopt (pycurl .USERPWD , "e.morovati@yahoo.com:{}" .format (accessToken [accessTokenCounter % 10 ]))
37
+ conn .setopt (pycurl .URL ,
38
+ f"https://api.github.com/repos/{ repo } /releases?per_page=100&page={ pageCounter } " )
39
+ conn .setopt (pycurl .WRITEFUNCTION , output .write )
40
+ conn .perform ()
41
+
42
+ accessTokenCounter += 1
43
+ pageCounter += 1
44
+
45
+ result = output .getvalue ().decode ()
46
+ releases = json .loads (result )
47
+
48
+ if not releases :
49
+ break
50
+ utc = pytz .UTC
51
+ for release in releases :
52
+ endDate = startDate
53
+ startDate = datetime .strptime (release ['created_at' ], "%Y-%m-%dT%H:%M:%SZ" )
54
+ # print(f"release {releaseCounter} is between {startDate} and {endDate}")
55
+ startDate = utc .localize (startDate )
56
+ # endDate = utc.localize(endDate)
57
+ repoReleases .update ({releaseCounter : [startDate , endDate ]})
58
+ releaseCounter += 1
59
+
60
+ repoReleases .update ({releaseCounter :[utc .localize (datetime .strptime ('1990-01-01T00:00:00Z' , "%Y-%m-%dT%H:%M:%SZ" )),startDate ]})
61
+
62
+ return repoReleases
63
+
64
+
65
+ def findRelease (commitDate , repoReleases ):
66
+ release = 0
67
+
68
+ for item in repoReleases .keys ():
69
+ releaseDate = repoReleases .get (item )
70
+ # if (commitDate <= utc.localize(releaseDate[1])) and (commitDate > utc.localize(releaseDate[0])):
71
+ if (commitDate <= releaseDate [1 ]) and (commitDate > releaseDate [0 ]):
72
+ release = item
73
+ break
74
+
75
+ return release
76
+
77
+
78
+ def main (commitMetricFile ):
79
+ counter = 0
80
+ users = {} # name of users and number of commits by each users
81
+ projectFiles = {} # last date of each file change
82
+ fileContributors = {}
83
+
84
+ repo = "apache/arrow"
85
+
86
+ crossLangCommitList = []
87
+ crossLangFlag = False
88
+
89
+ # cross-language bug-inducing commits
90
+ crossLangCommits = open ('results/arrow/arrowRealCJavaCommits.csv' , encoding = "ISO-8859-1" )
91
+ crossLangCommitsReader = csv .reader (crossLangCommits , delimiter = ',' )
92
+ for commit in crossLangCommitsReader :
93
+ crossLangCommitList .append (commit [0 ])
94
+
95
+ #get all commits
96
+ closedCommits = open ('results/arrow/arrowClosedCommits.csv' , encoding = "ISO-8859-1" )
97
+ closedCommitsReader = csv .reader (closedCommits , delimiter = ',' )
98
+
99
+ # get all releases
100
+ repoReleases = getReleases (repo )
101
+
102
+
103
+ csvOut = open ('results/arrow/arrowSortedCommitMetrics.csv' , 'w' )
104
+ writer = csv .writer (csvOut )
105
+
106
+ # add header to the CSV file
107
+ writer .writerow (["commit SHA" , "commit date" , "release" , "added LOC" , "deleted LOC" , "churn LOC" , "entropy" , "change age" , "changed files" ,
108
+ "contributors" , "developer-exp" , "cross-lang" ])
109
+
110
+ gr = GitRepository ('/home/mmm/Projects/arrow' )
111
+
112
+ for row in reversed (list (closedCommitsReader )):
113
+ commit = gr .get_commit (row [0 ])
114
+ crossLangFlag = False
115
+ counter += 1
116
+ rowToWrite = []
117
+ rowToWrite .append (row [0 ])
118
+
119
+ commitDate = commit .author_date
120
+ author = commit .author .name
121
+ rowToWrite .append (commitDate )
122
+
123
+ rowToWrite .append (findRelease (commitDate , repoReleases ))
124
+
125
+ addedLOC = 0
126
+ deletedLOC = 0
127
+ churnLOC = 0
128
+ entropy = 0
129
+ changesAge = 0
130
+ developerContributor = 0
131
+
132
+ authorExperience = users .get (author , 0 )
133
+ commitfiles = commit .modifications
134
+ changedFiles = len (commitfiles )
135
+
136
+ for commitFile in commitfiles :
137
+ addedLOC += commitFile .added
138
+ deletedLOC += commitFile .removed
139
+ churnLOC += commitFile .added + commitFile .removed
140
+ fileName = f"{ commitFile .new_path } /{ commitFile .filename } "
141
+ developerContributor = developerContributor + len (fileContributors .get (fileName , '' ))
142
+ if fileName in projectFiles .keys ():
143
+ fileLastChange = projectFiles .get (fileName )
144
+ changesAge = changesAge + (commitDate - fileLastChange ).days
145
+
146
+ # calculating entroopy
147
+ for commitFile in commitfiles :
148
+
149
+ fileChanges = commitFile .added + commitFile .removed
150
+ if fileChanges == 0 :
151
+ changedLOC = 1
152
+ else :
153
+ changedLOC = fileChanges
154
+
155
+ if churnLOC == 0 :
156
+ churnLOC = changedLOC
157
+ tmpEntropy = math .log2 (changedLOC / churnLOC ) * (- 1 )
158
+ # print("entropy : " + str(tmpEntropy))
159
+ entropy += tmpEntropy
160
+
161
+ if changedFiles != 0 :
162
+ addedLOC = addedLOC / changedFiles
163
+ deletedLOC = deletedLOC / changedFiles
164
+ churnLOC = churnLOC / changedFiles
165
+ changesAge = changesAge / changedFiles
166
+ entropy = entropy / changedFiles
167
+ developerContributor = developerContributor / changedFiles
168
+
169
+
170
+ if row [0 ] in crossLangCommitList :
171
+ crossLangFlag = True
172
+
173
+ rowToWrite .append (addedLOC )
174
+ rowToWrite .append (deletedLOC )
175
+ rowToWrite .append (churnLOC )
176
+ rowToWrite .append (entropy )
177
+ rowToWrite .append (changesAge )
178
+ rowToWrite .append (changedFiles )
179
+ rowToWrite .append (developerContributor )
180
+ rowToWrite .append (authorExperience )
181
+ rowToWrite .append (crossLangFlag )
182
+ writer .writerow (rowToWrite )
183
+
184
+ print (f"{ counter } commit is parsed" )
185
+
186
+ #///////////////////////////////
187
+ # calculate author experience
188
+ if author in users .keys ():
189
+ exp = users .get (author )
190
+ exp += 1
191
+ users [author ] = exp
192
+ else :
193
+ users .update ({author : 1 })
194
+
195
+ # calculate age of last change of each file & number of developer contributor in the file
196
+ for commitfile in commitfiles :
197
+
198
+ # age of last change of each file
199
+ fileName = f"{ commitFile .new_path } /{ commitFile .filename } "
200
+ if fileName in projectFiles .keys ():
201
+ projectFiles [fileName ] = commit .author_date
202
+ else :
203
+ projectFiles .update ({fileName : commit .author_date })
204
+
205
+ # number of contributor in each file
206
+ contribitors = []
207
+ if fileName in fileContributors .keys ():
208
+ contribitors = fileContributors .get (fileName )
209
+ if author in contribitors :
210
+ break
211
+ else :
212
+ contribitors .append (author )
213
+ fileContributors [fileName ] = contribitors
214
+ else :
215
+ contribitors .append (author )
216
+ fileContributors .update ({fileName : contribitors })
217
+ csvOut .close ()
218
+
219
+ return True
220
+
221
+
222
+ if __name__ == '__main__' :
223
+ main ("test" )
0 commit comments