@@ -104,25 +104,31 @@ def randomForest():
104
104
trainData = pd .read_csv ('results/TrainData.csv' , low_memory = False )
105
105
testData = pd .read_csv ('results/TestData.csv' , low_memory = False )
106
106
107
- feature_names = ["release" ,
108
- # "addedLOC", "deletedLOC",
107
+ feature_names = [#"release", "addedLOC", "deletedLOC",
109
108
"churnLOC" , "entropy" ,"changeAge" , "changedFiles" ,
110
- "contributors" , "developerExp" , "maxPreviousBug" , "maxPreviousCrossLangBug" ]
109
+ "contributors" , "developerExp" , "maxPreviousBug"
110
+ ''', "maxPreviousCrossLangBug"'''
111
+ ]
111
112
112
113
y_train = trainData ["crossLang" ]
113
- X_train = trainData .drop (["crossLang" , "addedLOC" ,"deletedLOC" ], axis = 1 )
114
+ X_train = trainData .drop (["release" , " crossLang" , "addedLOC" ,"deletedLOC" , "maxPreviousCrossLangBug " ], axis = 1 )
114
115
115
116
y_test = testData ["crossLang" ]
116
- X_test = testData .drop (["crossLang" , "addedLOC" ,"deletedLOC" ], axis = 1 )
117
+ X_test = testData .drop (["release" , " crossLang" , "addedLOC" ,"deletedLOC" , "maxPreviousCrossLangBug " ], axis = 1 )
117
118
119
+ # print(X_test.head())
120
+ # print(y_test.head())
121
+ #
122
+ # print(X_test.columns.values)
123
+ # print(y_test.columns.values)
118
124
clf = RandomForestClassifier (n_estimators = 433 , criterion = "gini" )
119
125
clf .fit (X_train , y_train )
120
126
y_pred = clf .predict (X_test )
121
127
print ("Accuracy:" , metrics .accuracy_score (y_test , y_pred ))
122
-
128
+ #
123
129
print (confusion_matrix (y_test , y_pred ))
124
130
print (classification_report (y_test , y_pred ))
125
-
131
+ #
126
132
feature_imp = pd .Series (clf .feature_importances_ , index = feature_names ).sort_values (ascending = False )
127
133
print (feature_imp )
128
134
@@ -131,10 +137,11 @@ def main():
131
137
# repositories = ["omim", "react-native", "libgdx", "openj9", "rocksdb", "realm-java", "jmonkeyengine", "arrow",
132
138
# # "conscrypt",
133
139
# "jna"]
134
- #
140
+ # #
135
141
# for repo in repositories:
136
142
# separateTestTrain(repo)
137
143
# resampling(repo)
144
+ # combineTrainsTests(repo)
138
145
#
139
146
#
140
147
# testData = open(f'results/TestData.csv', 'w')
@@ -155,6 +162,7 @@ def main():
155
162
# combineTrainsTests(repo)
156
163
157
164
# /////////////////////
165
+ # separateTestTrain("conscrypt")
158
166
# resampling("conscrypt")
159
167
# combineTrainsTests("conscrypt")
160
168
0 commit comments