Skip to content

Commit 5670378

Browse files
authoredAug 26, 2020
Random Forest
1 parent 4d2d6fe commit 5670378

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed
 

‎resamplingSmote.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,31 @@ def randomForest():
104104
trainData = pd.read_csv('results/TrainData.csv', low_memory=False)
105105
testData = pd.read_csv('results/TestData.csv', low_memory=False)
106106

107-
feature_names = ["release",
108-
# "addedLOC", "deletedLOC",
107+
feature_names = [#"release", "addedLOC", "deletedLOC",
109108
"churnLOC", "entropy","changeAge", "changedFiles",
110-
"contributors", "developerExp", "maxPreviousBug", "maxPreviousCrossLangBug"]
109+
"contributors", "developerExp", "maxPreviousBug"
110+
''', "maxPreviousCrossLangBug"'''
111+
]
111112

112113
y_train = trainData["crossLang"]
113-
X_train = trainData.drop(["crossLang", "addedLOC","deletedLOC"], axis=1)
114+
X_train = trainData.drop(["release","crossLang", "addedLOC","deletedLOC","maxPreviousCrossLangBug"], axis=1)
114115

115116
y_test = testData["crossLang"]
116-
X_test = testData.drop(["crossLang", "addedLOC","deletedLOC"], axis=1)
117+
X_test = testData.drop(["release","crossLang", "addedLOC","deletedLOC","maxPreviousCrossLangBug"], axis=1)
117118

119+
# print(X_test.head())
120+
# print(y_test.head())
121+
#
122+
# print(X_test.columns.values)
123+
# print(y_test.columns.values)
118124
clf = RandomForestClassifier(n_estimators=433, criterion="gini")
119125
clf.fit(X_train, y_train)
120126
y_pred = clf.predict(X_test)
121127
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
122-
128+
#
123129
print(confusion_matrix(y_test, y_pred))
124130
print(classification_report(y_test, y_pred))
125-
131+
#
126132
feature_imp = pd.Series(clf.feature_importances_, index=feature_names).sort_values(ascending=False)
127133
print(feature_imp)
128134

@@ -131,10 +137,11 @@ def main():
131137
# repositories = ["omim", "react-native", "libgdx", "openj9", "rocksdb", "realm-java", "jmonkeyengine", "arrow",
132138
# # "conscrypt",
133139
# "jna"]
134-
#
140+
# #
135141
# for repo in repositories:
136142
# separateTestTrain(repo)
137143
# resampling(repo)
144+
# combineTrainsTests(repo)
138145
#
139146
#
140147
# testData = open(f'results/TestData.csv', 'w')
@@ -155,6 +162,7 @@ def main():
155162
# combineTrainsTests(repo)
156163

157164
# /////////////////////
165+
# separateTestTrain("conscrypt")
158166
# resampling("conscrypt")
159167
# combineTrainsTests("conscrypt")
160168

0 commit comments

Comments
 (0)
Please sign in to comment.