-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathtrain.py
335 lines (288 loc) · 12.8 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
'''
-*- coding: utf-8 -*-
Training script for the baseline model
@inproceedings{majumder2019emnlp,
title={Generating Personalized Recipes from Historical User Preferences},
author={Majumder, Bodhisattwa Prasad* and Li, Shuyang* and Ni, Jianmo and McAuley, Julian},
booktitle={EMNLP},
year={2019}
}
Copyright Shuyang Li & Bodhisattwa Majumder
License: GNU GPLv3
'''
import os
import torch
import numpy as np
import pickle
import torch.utils.data as data
import torch.nn as nn
from functools import partial
from tqdm import tqdm
from itertools import chain
from datetime import datetime
from recipe_gen.language import START_INDEX, PAD_INDEX
from recipe_gen.pipeline.train import train_model
from recipe_gen.pipeline.batch import get_batch_information_general
from recipe_gen.pipeline.eval import top_k_logits, sample_next_token
def run_epoch(device, model, sampler, loss_compute, print_every, max_len,
clip=None, teacher_forcing=False, max_name_len=15, **tensor_kwargs):
"""
Run a single epoch
Arguments:
device {torch.device} -- Torch device on which to store/process data
model {nn.Module} -- Model to be trained/run
sampler {BatchSampler} -- Data sampler
loss_compute {funct} -- Function to compute loss for each batch
print_every {int} -- Log loss every k iterations
max_len {int} -- Maximum length / number of steps to unroll and predict
Keyword Arguments:
clip {float} -- Clip gradients to a maximum (default: {None})
teacher_forcing {bool} -- Whether to do teacher-forcing in training (default: {False})
max_name_len {int} -- Maximum # timesteps to unroll to predict name (default: {15})
**tensor_kwargs {torch.Tensor} -- Assorted tensors for fun and profit
Returns:
float -- Average loss across the epoch
"""
start = datetime.now()
total_tokens = 0
total_name_tokens = 0
total_loss = 0.0
total_name_loss = 0.0
print_tokens = 0
# Extract into tuples and list
tensor_names, base_tensors = zip(*tensor_kwargs.items())
# Iterate through batches in the epoch
for i, batch in enumerate(tqdm(sampler.epoch_batches(), total=sampler.n_batches), 1):
batch_users, items = [t.to(device) for t in batch]
# Fill out batch information
batch_map = dict(zip(
tensor_names,
get_batch_information_general(items, *base_tensors)
))
# Logistics
this_batch_size = batch_map['steps_tensor'].size(0)
this_batch_num_tokens = (batch_map['steps_tensor'] != PAD_INDEX).data.sum().item()
this_batch_num_name_tokens = 0
this_batch_num_name_tokens = (batch_map['name_tensor'] != PAD_INDEX).data.sum().item()
name_targets = batch_map['name_tensor']
# Batch first
# Comparing out(token[t-1]) to token[t]
(log_probs, _), (name_log_probs, _) = model.forward(
device=device, inputs=(
batch_map['calorie_level_tensor'],
batch_map['name_tensor'],
batch_map['ingr_tensor']
),
ingr_masks=batch_map['ingr_mask_tensor'],
targets=batch_map['steps_tensor'][:, :-1],
max_len=max_len-1,
start_token=START_INDEX,
teacher_forcing=teacher_forcing,
name_targets=name_targets[:, :-1],
max_name_len=max_name_len-1,
visualize=False
)
loss, name_loss = loss_compute(
log_probs, batch_map['steps_tensor'][:, 1:],
name_outputs=name_log_probs,
name_targets=name_targets[:, 1:],
norm=this_batch_size,
model=model,
clip=clip
)
total_loss += loss
total_name_loss += name_loss
# Logging
total_tokens += this_batch_num_tokens
total_name_tokens += this_batch_num_name_tokens
print_tokens += this_batch_num_tokens
if model.training and i % print_every == 0:
elapsed = datetime.now() - start
print("Epoch Step: {} LM Loss: {:.5f}; {}; Tokens/s: {:.3f}".format(
i,
loss / this_batch_size,
'Name Loss: {:.5f}'.format(name_loss / this_batch_size) if name_loss else '',
print_tokens / elapsed.seconds
))
start = datetime.now()
print_tokens = 0
del log_probs, name_log_probs
# Reshuffle the sampler
sampler.renew_indices()
if total_name_tokens > 0:
print('\nName Perplexity: {}'.format(np.exp(total_name_loss / float(total_name_tokens))))
return np.exp(total_loss / float(total_tokens))
'''
==== RUN
nohup python3 -u -m recipe_gen.models.baseline.train --data-dir <DATA FOLDER> --batch-size 25 --vocab-emb-size 300 --calorie-emb-size 5 --nhid 256 --nlayers 2 --lr 1e-3 --epochs 50 --annealing-rate 0.9 --save <MODEL FOLDER> --ingr-emb --ingr-gru --exp-name baseline > baseline.out &
tail -f baseline.out
nohup python3 -u -m recipe_gen.models.baseline.train --data-dir <DATA FOLDER> --batch-size 25 --vocab-emb-size 300 --calorie-emb-size 5 --nhid 256 --nlayers 2 --lr 1e-3 --epochs 50 --annealing-rate 0.9 --save <MODEL FOLDER> --ingr-emb --ingr-gru --decode-name --shared-proj --exp-name baseline_name > baseline_name.out &
tail -f baseline_name.out
'''
if __name__ == "__main__":
import torch
import argparse
import torch.nn.init as init
from recipe_gen.utils import get_device, count_parameters
from recipe_gen.pipeline import DataFrameDataset, BatchSampler
from recipe_gen.pipeline.batch import load_full_data, pad_recipe_info, load_recipe_tensors
# Module imports
from . import create_model
from .generate import decode_single
parser = argparse.ArgumentParser(description='Baseline for recipe generation (dynamic attn)')
parser.add_argument('--data-dir', type=str, required=True, help='location of the data corpus')
parser.add_argument('--batch-size', type=int, default=48, metavar='N', help='batch size')
parser.add_argument('--vocab-emb-size', type=int, default=50, help='size of word embeddings')
parser.add_argument('--calorie-emb-size', type=int, default=50, help='size of calorie embeddings')
parser.add_argument('--ingr-emb-size', type=int, default=10, help='size of ingr embeddings')
parser.add_argument('--nhid', type=int, default=256, help='number of hidden units per layer')
parser.add_argument('--nlayers', type=int, default=1, help='number of layers')
parser.add_argument('--lr', type=float, default=1e-4, help='initial learning rate')
parser.add_argument('--clip', type=float, default=None, help='gradient clipping')
parser.add_argument('--epochs', type=int, default=5, help='upper epoch limit')
parser.add_argument('--dropout', type=float, default=0.2,
help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--log-interval', type=int, default=500, metavar='N', help='report interval')
parser.add_argument('--annealing-rate', type=float, default=1.0, metavar='N',
help='learning rate annealing (default 1.0 - no annealing, 0.0 - early stoppage)')
parser.add_argument('--teacher-forcing', default=None, type=int,
help='number of epochs to teacher-force when training (default ALL epochs)')
parser.add_argument('--save', type=str, default='<MODEL FOLDER>',
help='path to save the final model')
parser.add_argument('--exp-name', type=str, required=True, default='base', help='exp name')
parser.add_argument('--ingr-gru', action='store_true', default=False,
help='Use BiGRU for ingredient encoding')
parser.add_argument('--decode-name', action='store_true', default=False,
help='Multi-task learn to decode name along with recipe')
parser.add_argument('--ingr-emb', action='store_true', default=False,
help='Use Ingr embedding in encoder')
parser.add_argument('--shared-proj', action='store_true', default=False,
help='Share projection layers for name and steps')
parser.add_argument('--load-checkpoint', type=str, default=None,
help='Load from state dict checkpoint')
args = parser.parse_args()
start = datetime.now()
USE_CUDA, DEVICE = get_device()
# Filters
MAX_NAME = 15
MAX_INGR = 5
MAX_INGR_TOK = 20
MAX_STEP_TOK = 256
# Reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
# Args
data_dir = args.data_dir
batch_size = args.batch_size
vocab_emb_dim = args.vocab_emb_size
calorie_emb_dim = args.calorie_emb_size
ingr_emb_dim = args.ingr_emb_size
hidden_size = args.nhid
n_layers = args.nlayers
dropout = args.dropout
num_epochs = args.epochs
lr = args.lr
print_every = args.log_interval
exp_name = args.exp_name
save_folder = args.save
lr_annealing_rate = args.annealing_rate
clip = args.clip
ingr_gru = args.ingr_gru
ingr_emb = args.ingr_emb
decode_name = args.decode_name
shared_proj = args.shared_proj
n_teacher_forcing = args.teacher_forcing
checkpoint_loc = args.load_checkpoint
if checkpoint_loc is not None:
print('Loading state dict from {}'.format(checkpoint_loc))
if n_teacher_forcing is None:
n_teacher_forcing = num_epochs
if not os.path.exists(save_folder):
os.mkdir(save_folder)
# Get the DFs
train_df, valid_df, test_df, user_items_df, df_r, ingr_map = load_full_data(data_dir)
n_items = len(df_r)
print('{} - Data loaded.'.format(datetime.now() - start))
# Pad recipe information
N_INGREDIENTS = 0
if ingr_emb:
print('INGR EMBEDDING')
n_ingredients_og = max(chain.from_iterable(df_r['ingredient_ids'].values)) + 1
PAD_INGR = n_ingredients_og
N_INGREDIENTS = n_ingredients_og + 1
df_r = pad_recipe_info(
df_r, max_name_tokens=MAX_NAME, min_ingredients=3, max_ingredients=MAX_INGR,
max_ingr_tokens=MAX_INGR_TOK, max_step_tokens=MAX_STEP_TOK
)
tensors_to_load = [
('name_tensor', 'name_tokens'),
('calorie_level_tensor', 'calorie_level'),
('technique_tensor', 'techniques'),
('ingr_tensor', 'ingredient_ids' if ingr_emb else 'ingredient_tokens'),
('steps_tensor', 'steps_tokens'),
('ingr_mask_tensor', 'ingredient_id_mask' if ingr_emb else 'ingredient_mask'),
('tech_mask_tensor', 'techniques_mask'),
]
tensor_names, tensor_cols = zip(*tensors_to_load)
# Load tensors into memory
memory_tensors = load_recipe_tensors(
df_r, DEVICE, cols=tensor_cols, types=[torch.LongTensor] * len(tensors_to_load)
)
memory_tensor_map = dict(zip(tensor_names, memory_tensors))
print('{} - Tensors loaded in memory.'.format(datetime.now() - start))
# Samplers
train_data = DataFrameDataset(train_df, ['u', 'i'])
train_sampler = BatchSampler(train_data, batch_size, random=True)
valid_data = DataFrameDataset(valid_df, ['u', 'i'])
valid_sampler = BatchSampler(valid_data, batch_size)
test_data = DataFrameDataset(test_df, ['u', 'i'])
test_sampler = BatchSampler(test_data, batch_size)
'''
Create model
'''
model = create_model(
vocab_emb_dim=vocab_emb_dim, calorie_emb_dim=calorie_emb_dim,
hidden_size=hidden_size, n_layers=n_layers, dropout=dropout,
max_ingr=MAX_INGR, max_ingr_tok=MAX_INGR_TOK, use_cuda=USE_CUDA,
state_dict_path=checkpoint_loc, decode_name=decode_name,
ingr_gru=ingr_gru, ingr_emb=ingr_emb, num_ingr=N_INGREDIENTS,
ingr_emb_dim=ingr_emb_dim, shared_projection=shared_proj,
)
print('{} - {} Model defined with {:,} parameters'.format(
datetime.now() - start, exp_name, count_parameters(model)
))
'''
TRAIN MODEL
'''
partial_run_epoch = partial(
run_epoch,
print_every=print_every,
max_len=MAX_STEP_TOK,
max_name_len=MAX_NAME,
clip=clip,
**memory_tensor_map
)
partial_decode_single = partial(
decode_single,
max_len=MAX_STEP_TOK,
max_name_len=MAX_NAME,
ingr_map=ingr_map,
max_ingr=MAX_INGR,
max_ingr_tok=MAX_INGR_TOK,
**memory_tensor_map
)
dev_perplexities, test_perplexity = train_model(
DEVICE, model, train_sampler, valid_sampler, test_sampler,
num_epochs=num_epochs, lr=lr, exp_name=exp_name,
partial_run_epoch=partial_run_epoch, partial_decode_single=partial_decode_single,
lr_annealing_rate=lr_annealing_rate, n_teacher_forcing=n_teacher_forcing,
save_folder=save_folder)
# Save perplexities
stats_loc = os.path.join(args.save, 'model_stats_{}.pkl'.format(args.exp_name))
with open(stats_loc, 'wb') as stats_file:
pickle.dump([dev_perplexities, test_perplexity], stats_file, protocol=pickle.HIGHEST_PROTOCOL)
print('{} - Saved stats to {}'.format(
datetime.now() - start, stats_loc
))