-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain.py
67 lines (57 loc) · 2.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import sys
import numpy as np
import pandas as pd
from classes.Signal import Signal
from classes.DataSource import DataSource
from classes.SignalClassifier import SignalClassifier
import matplotlib.pyplot as plt
ds = DataSource()
# Load initial labeled training set T
labeled_ds = ds.load_or_process_labeled_dataset()
# Load entire (unlabeled) data set P
ds.load_or_process_entire_dataset()
# Remove T from P (i.e. P = P-T)
ds.remove_labeled_subset_from_dataset(labeled_ds)
# Initialize model
c = SignalClassifier()
input_dim = len(labeled_ds.feature_vec.iloc[0])
c.init_nn_model(input_dim=input_dim)
num_batches = 10
batch = 1
# Train on T
print("Batch %d/%d" % (batch, num_batches))
c.train(labeled_ds, num_epochs=10)
while batch<num_batches:
batch+=1
# First, sort the dataset by model predictions
ds.dataset = c.pred_and_sort(ds.dataset)
qty = 100
if ds.dataset.shape[0] < qty*2:
break # reached end of dataset
# Extract the most confidently classified new features T from P
most_confident_samples = pd.concat([ds.dataset.iloc[:qty],
ds.dataset.iloc[-qty:]])
# Drop these from greater dataset (in memory only) to avoid
# using them in next iteration (P = P-T)
samples_to_drop = list(ds.dataset.iloc[:qty].index.values) + \
list(ds.dataset.iloc[-qty:].index.values)
ds.dataset.drop(samples_to_drop, inplace=True)
# Generate labels based on predictions
labels = np.rint(most_confident_samples.pred)
most_confident_samples["label"] = list(labels)
print("\r\nBatch %d/%d" % (batch, num_batches))
c.train(most_confident_samples, num_epochs=4)
# Evaluate
test_ds = ds.load_or_process_labeled_dataset(from_file_id=20)
print("Positive test set size",test_ds[test_ds.label == 1].shape[0])
print("Negative test set size",test_ds[test_ds.label == 0].shape[0])
results = c.evaluate(test_ds)
results = {c.model.metrics_names[i]:v for i,v in enumerate(results)}
print(results)
# Display results
test_ds = c.pred_and_sort(test_ds)
ds.confusion(test_ds)
c.plot_losses()
# Plot a few of the most confidently predicted segments
ds.display_dataset(test_ds)