Commit 452baee 1 parent 2c8cec2 commit 452baee Copy full SHA for 452baee
File tree 4 files changed +9
-2
lines changed
4 files changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -74,7 +74,12 @@ def init_args_from_command_line(
74
74
default = "./torchtrain/outputs" ,
75
75
help = "folder to dump job outputs" ,
76
76
)
77
-
77
+ parser .add_argument (
78
+ "--job.description" ,
79
+ type = str ,
80
+ default = "default job" ,
81
+ help = "description of the job" ,
82
+ )
78
83
# profiling configs
79
84
parser .add_argument (
80
85
"--profiling.run_profiler" ,
Original file line number Diff line number Diff line change @@ -93,7 +93,7 @@ def main(job_config: JobConfig):
93
93
world_size = world_size ,
94
94
)
95
95
world_mesh = parallel_dims .build_mesh (device_type = "cuda" )
96
-
96
+ rank0_log ( f"Starting job: { job_config . job . description } " )
97
97
model_name = job_config .model .name
98
98
rank0_log (f"Building { model_name } " )
99
99
# build tokenizer
Original file line number Diff line number Diff line change 1
1
# TorchTrain Config.toml
2
2
[job ]
3
3
dump_folder = " ./outputs"
4
+ description = " debug training"
4
5
5
6
[profiling ]
6
7
run_profiler = true
Original file line number Diff line number Diff line change 1
1
# TorchTrain Config.toml
2
2
[job ]
3
3
dump_folder = " ./outputs"
4
+ description = " llama 7b training"
4
5
5
6
[profiling ]
6
7
run_profiler = true
You can’t perform that action at this time.
0 commit comments