1
+ from functools import partial
1
2
import itertools
2
3
import statistics
3
4
import timeit
4
5
import torch
5
6
6
- profiling_enabled = None
7
- profiling_tensor_size = None
8
7
TENSOR_SIZES = [1 , 32 , 128 , 256 , 512 ]
9
8
INTERNAL_ITER = 256
10
9
PARALLEL_TASKS_NUM = 4
@@ -16,13 +15,12 @@ def loop_workload(x):
16
15
return x
17
16
18
17
traced_loop_workload = None
19
- def run_profiler_benchmark_loop ():
20
- x = torch .rand (profiling_tensor_size , profiling_tensor_size )
18
+ def run_profiler_benchmark_loop (input_x , use_cuda , profiling_enabled ):
21
19
if profiling_enabled :
22
- with torch .autograd .profiler .profile () as prof :
23
- traced_loop_workload (x )
20
+ with torch .autograd .profiler .profile (use_cuda = use_cuda ) as prof :
21
+ traced_loop_workload (input_x )
24
22
else :
25
- traced_loop_workload (x )
23
+ traced_loop_workload (input_x )
26
24
27
25
def parallel_task (x ):
28
26
for i in range (int (INTERNAL_ITER / PARALLEL_TASKS_NUM )):
@@ -38,40 +36,49 @@ def parallel_workload(x):
38
36
return x
39
37
40
38
traced_parallel_workload = None
41
- def run_profiler_benchmark_parallel ():
42
- x = torch .rand (profiling_tensor_size , profiling_tensor_size )
39
+ def run_profiler_benchmark_parallel (input_x , use_cuda , profiling_enabled ):
43
40
if profiling_enabled :
44
- with torch .autograd .profiler .profile () as prof :
45
- traced_parallel_workload (x )
41
+ with torch .autograd .profiler .profile (use_cuda = use_cuda ) as prof :
42
+ traced_parallel_workload (input_x )
46
43
else :
47
- traced_parallel_workload (x )
44
+ traced_parallel_workload (input_x )
48
45
49
46
if __name__ == '__main__' :
50
47
for workload_name in ["loop" , "parallel" ]:
51
48
print ("Payload: {}; {} iterations, N = {}\n " .format (
52
49
workload_name , INTERNAL_ITER , N ))
53
- for params in itertools .product (TENSOR_SIZES , [False , True ]):
54
- profiling_tensor_size = params [0 ]
55
- profiling_enabled = params [1 ]
50
+ for params in itertools .product ([False , True ], TENSOR_SIZES , [False , True ]):
51
+ use_cuda = params [0 ]
52
+ profiling_tensor_size = params [1 ]
53
+ profiling_enabled = params [2 ]
56
54
57
- print ("Profiling {}, tensor size {}x{}" .format (
58
- "enabled " if profiling_enabled else "disabled" ,
59
- profiling_tensor_size , profiling_tensor_size ))
55
+ if (use_cuda and not torch .cuda .is_available ()):
56
+ continue
60
57
61
- x = torch .rand (profiling_tensor_size , profiling_tensor_size )
58
+ print ("Profiling {}, tensor size {}x{}, use cuda: {}" .format (
59
+ "enabled" if profiling_enabled else "disabled" ,
60
+ profiling_tensor_size , profiling_tensor_size , use_cuda ))
61
+
62
+ input_x = torch .rand (profiling_tensor_size , profiling_tensor_size )
63
+ if use_cuda :
64
+ input_x = input_x .cuda ()
62
65
workload = None
63
66
if workload_name == "loop" :
64
- workload = run_profiler_benchmark_loop
65
- traced_loop_workload = torch .jit .trace (loop_workload , x )
67
+ workload = partial (
68
+ run_profiler_benchmark_loop , input_x , use_cuda , profiling_enabled )
69
+ traced_loop_workload = torch .jit .trace (loop_workload , input_x )
66
70
elif workload_name == "parallel" :
67
- workload = run_profiler_benchmark_parallel
71
+ workload = partial (
72
+ run_profiler_benchmark_parallel , input_x , use_cuda , profiling_enabled )
68
73
traced_parallel_workload = torch .jit .trace (
69
- parallel_workload , x )
74
+ parallel_workload , input_x )
70
75
71
76
runtimes = timeit .repeat (workload , repeat = N , number = 1 )
72
77
avg_time = statistics .mean (runtimes ) * 1000.0
73
78
stddev_time = statistics .stdev (runtimes ) * 1000.0
74
79
print ("\t avg. time: {:.3f} ms, stddev: {:.3f} ms" .format (
75
80
avg_time , stddev_time ))
76
- print ("\t time per iteration: {:.3f} ms\n " .format (
77
- avg_time / INTERNAL_ITER ))
81
+ if workload_name == "loop" :
82
+ print ("\t time per iteration: {:.3f} ms" .format (
83
+ avg_time / INTERNAL_ITER ))
84
+ print ()
0 commit comments