-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTrain.py
164 lines (133 loc) · 5.99 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import numpy as np
import sys
sys.path.append('/home/yluan/Documents/SOWFA PostProcessing/SOWFA-Postprocess')
from PostProcess_FieldData import FieldData
sys.path.append('/home/yluan/Documents/ML/TBNN/examples/turbulence')
from turbulencekepspreprocessor import TurbulenceKEpsDataProcessor
from turbulence_example_driver import plot_results
# For Python 2.7, use cpickle
try:
import cpickle as pickle
except ModuleNotFoundError:
import pickle
from tbdt_v8 import TBDT # Tensor Basis Decision Tree algorithm
from tbrf_v4 import TBRF
"""
User Inputs
"""
caseName = 'ALM_N_H_OneTurb'
caseDir = '/media/yluan'
times = 'latest'
fields = ('kResolved', 'kSGSmean', 'epsilonSGSmean', 'nuSGSmean', 'gradUAvg', 'uuPrime2')
# Whether use existing pickle raw field data and/or ML x, y pickle data
useRawPickle, useXYpickle = True, True
read_treefile = False
# Whether confine the domain of interest, useful if mesh is too large
confineBox, plotConfinedBox = True, True
# Only when confineBox and saveFields are True
# Subscript of the confined field file name
confinedFieldNameSub = 'Confined'
# Whether auto generate confine box for each case,
# the confinement is bordered by 1st/2nd refinement zone
boxAutoDim = 'second' # 'first', 'second', None
# Only when boxAutoDim is False
# Confine box counter-clockwise rotation in x-y plane
boxRot = np.pi/6
# Confine box origin, width, length, height
boxOrig = (0, 0, 0) # (x, y, z)
boxL, boxW, boxH = 0, 0, 0 # float
# Absolute cap value for Sji and Rij and scalar basis x
capSijRij, capScalarBasis = 1e9, 1e9
"""
TBRF Hyper-parameters
"""
regularization = True
regularization_lambda = 1e-12
write_g = False #write tensor basis coefficient fields
splitting_features='all' # 'all', 'div3', 'sqrt', or an integer value
min_samples_leaf = 9
n_trees = 3
optim_split = True
sampling = True
# Only if sampling is True
# Number of samples and whether to use with replacement, i.e. same sample can be re-picked
fraction, replace = 0.003, False
seed = 12345 # use for reproducibility, set equal to None for no seeding
split_fraction = 0.8 # Fraction of data to use for training
"""
Process User Inputs
"""
# Save pickle fields automatically
saveFields = True if any((useRawPickle, useXYpickle)) else False
inputsEnsembleName = 'Inputs_' + caseName
if times == 'latest':
if caseName == 'ALM_N_H_ParTurb':
times = '22000.0918025'
elif caseName == 'ALM_N_H_OneTurb':
times = '24995.0788025'
if confineBox and boxAutoDim is not None:
if caseName == 'ALM_N_H_ParTurb':
# 1st refinement zone as confinement box
if boxAutoDim == 'first':
boxOrig = (1074.225, 599.464, 0)
boxL, boxW, boxH = 1134, 1134, 405
# 2nd refinement zone as confinement box
elif boxAutoDim == 'second':
boxOrig = (1120.344, 771.583, 0)
boxL, boxW, boxH = 882, 378, 216
elif caseName == 'ALM_N_H_OneTurb':
if boxAutoDim == 'first':
boxOrig = (948.225, 817.702, 0)
boxL, boxW, boxH = 1134, 630, 405
# 2nd refinement zone as confinement box
elif boxAutoDim == 'second':
boxOrig = (994.344, 989.583, 0)
boxL, boxW, boxH = 882, 378, 216
"""
Read Pickle Data
"""
# Set file names for pickle
fileNames = ('Sij', 'Rij', 'x', 'tb', 'y') if not confineBox \
else \
('Sij_' + confinedFieldNameSub, 'Rij_' + confinedFieldNameSub, 'x_' + confinedFieldNameSub, 'tb_' + confinedFieldNameSub, 'y_' + confinedFieldNameSub)
case = FieldData(caseName = caseName, caseDir = caseDir, times = times, fields = fields, save = saveFields)
dataDict = case.readPickleData(fileNames = fileNames, resultPath = case.resultPath[times])
Sij, Rij = dataDict[fileNames[0]], dataDict[fileNames[1]]
x, tb, y = dataDict[fileNames[2]], dataDict[fileNames[3]], dataDict[fileNames[4]]
# ----------- set up the tree filename which can be read later on if necessary
# The tree filenames contain the training data parameters, as well as the random
# forest parameters such as min. features per node etc.
tree_filename_ReCase = 'test'
tree_filename_var = 'test'
# ------------ path where the tree files need to be saved ------------------
tree_filename = './TreeFiles/TBRF_TREE%i_' + tree_filename_ReCase + tree_filename_var
tbrf = TBRF(min_samples_leaf=min_samples_leaf,tree_filename=tree_filename,n_trees=n_trees,regularization=regularization,
regularization_lambda=regularization_lambda, splitting_features=splitting_features,
optim_split=optim_split,optim_threshold=100,read_from_file=read_treefile)
# If use sampling
if sampling:
x, y, tb = tbrf.randomSampling(x.T, y.T, tb.T, fraction = fraction, replace = replace)
x_train, tb_train, y_train, x_test, tb_test, y_test = \
TurbulenceKEpsDataProcessor.train_test_split(x.T, tb.T, y.T, fraction=split_fraction, seed=seed)
print('\nTrain and test data split')
# Convert every array to nFeatures x nPoints
x_train, x_test, y_train, y_test, tb_train, tb_test = x_train.T, x_test.T, y_train.T, y_test.T, tb_train.T, tb_test.T
forest = tbrf.fit(x_train, y_train, tb_train)
bij_hat, bij_forest, g_forest = tbrf.predict(x_test, tb_test, forest)
# # filter predictions using the median filter (bij_MF), and spatial filter (bij_FMF):
# bij_MF = medianFilteredField(bij_forest, 3, bounds = False)
# bij_FMF = filterField(np.reshape(bij_MF, [3, 3, testParam['Nx'], testParam['Ny']]), [3, 3])
#
# # get predicted tensor basis coefficients for further analysis
# g_MF = medianFilteredField(g_forest, 3, bounds = False)
# g_FMF = filterField(np.reshape(g_MF, [1, 10, testParam['Nx'], testParam['Ny']]), [3, 3])
# g_FMF = np.reshape(g_FMF, [10, testParam['Nx']*testParam['Ny']])
# # No median filter for now
# bij_MF, g_MF = bij_hat, g_forest
#
# RMSE_MF = np.sqrt(np.mean(np.square(bij_MF - y_test)))
# print('RMSE b_ij, median filter: %f' % RMSE_MF)
# RMSE_FMF = np.sqrt(np.mean(np.square(bij_FMF-np.reshape(Y_test,[3,3,testParam['Nx'],testParam['Ny']]))))
# print('RMSE b_ij, median filter + gaussian filter: %f' % RMSE_FMF)
# Plot the results
plot_results(y_test.T, bij_hat.T)