Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 56d9908

Browse files
committedMar 15, 2021
potrf benchmark update
1 parent 670e2f2 commit 56d9908

File tree

4 files changed

+199
-109
lines changed

4 files changed

+199
-109
lines changed
 

‎linalg/cholesky/parse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
SORT_KEY = {
1111
"cpu": -1,
1212
"before_magma": 0,
13-
"after_potrf_and_batched": 1,
13+
# "after_potrf_and_batched": 1,
14+
"after_potrf64bit_and_magma": 2,
1415
}
1516

1617
class Markdown:
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
2+
1.9.0a0+gitccb6d48
3+
4+
batch_size, matrix_size, dtype cpu_time(us), gpu_time(us)
5+
[] 2 torch.float32 114.999 58.959
6+
[] 4 torch.float32 14.173 59.223
7+
[] 8 torch.float32 17.363 59.109
8+
[] 16 torch.float32 17.514 59.451
9+
[] 32 torch.float32 21.385 59.565
10+
[] 64 torch.float32 47.581 83.184
11+
[] 128 torch.float32 175.685 120.336
12+
[] 256 torch.float32 437.752 208.042
13+
[] 512 torch.float32 1745.911 400.172
14+
[] 1024 torch.float32 6181.202 972.376
15+
[] 2048 torch.float32 41858.311 2771.738
16+
[1] 2 torch.float32 16.335 58.924
17+
[1] 4 torch.float32 16.078 64.280
18+
[1] 8 torch.float32 16.779 59.009
19+
[1] 16 torch.float32 18.578 59.372
20+
[1] 32 torch.float32 22.422 59.963
21+
[1] 64 torch.float32 48.282 75.136
22+
[1] 128 torch.float32 175.487 112.467
23+
[1] 256 torch.float32 405.833 186.747
24+
[1] 512 torch.float32 1141.065 359.730
25+
[1] 1024 torch.float32 4506.147 972.515
26+
[1] 2048 torch.float32 28497.187 2773.273
27+
[2] 2 torch.float32 17.416 39.668
28+
[2] 4 torch.float32 17.908 40.283
29+
[2] 8 torch.float32 19.011 41.505
30+
[2] 16 torch.float32 22.287 47.370
31+
[2] 32 torch.float32 29.969 59.570
32+
[2] 64 torch.float32 80.104 86.426
33+
[2] 128 torch.float32 337.950 154.935
34+
[2] 256 torch.float32 526.128 354.285
35+
[2] 512 torch.float32 1621.910 1201.538
36+
[2] 1024 torch.float32 6042.175 4521.013
37+
[2] 2048 torch.float32 39463.474 7465.609
38+
[4] 2 torch.float32 18.921 40.741
39+
[4] 4 torch.float32 18.504 40.869
40+
[4] 8 torch.float32 21.586 43.534
41+
[4] 16 torch.float32 28.045 48.096
42+
[4] 32 torch.float32 42.119 60.854
43+
[4] 64 torch.float32 142.013 91.095
44+
[4] 128 torch.float32 580.392 165.793
45+
[4] 256 torch.float32 759.007 384.977
46+
[4] 512 torch.float32 2350.644 1304.067
47+
[4] 1024 torch.float32 9158.168 4929.194
48+
[4] 2048 torch.float32 57909.557 9631.141
49+
[8] 2 torch.float32 17.518 42.027
50+
[8] 4 torch.float32 17.462 41.145
51+
[8] 8 torch.float32 19.693 42.852
52+
[8] 16 torch.float32 27.283 48.796
53+
[8] 32 torch.float32 48.532 62.668
54+
[8] 64 torch.float32 173.550 95.206
55+
[8] 128 torch.float32 796.840 175.077
56+
[8] 256 torch.float32 1353.606 429.544
57+
[8] 512 torch.float32 4010.749 1457.487
58+
[8] 1024 torch.float32 18085.587 5868.911
59+
[16] 2 torch.float32 18.837 45.466
60+
[16] 4 torch.float32 20.189 43.327
61+
[16] 8 torch.float32 24.433 46.226
62+
[16] 16 torch.float32 43.162 50.815
63+
[16] 32 torch.float32 81.100 63.677
64+
[16] 64 torch.float32 322.427 96.308
65+
[16] 128 torch.float32 1581.786 184.481
66+
[16] 256 torch.float32 2411.335 490.029
67+
[16] 512 torch.float32 7786.231 1753.410
68+
[32] 2 torch.float32 21.183 40.783
69+
[32] 4 torch.float32 22.316 42.487
70+
[32] 8 torch.float32 32.291 43.080
71+
[32] 16 torch.float32 110.248 49.999
72+
[32] 32 torch.float32 134.325 64.570
73+
[32] 64 torch.float32 559.294 97.589
74+
[32] 128 torch.float32 2694.669 215.273
75+
[32] 256 torch.float32 4834.244 610.006
76+
[64] 2 torch.float32 27.419 40.654
77+
[64] 4 torch.float32 28.757 41.364
78+
[64] 8 torch.float32 46.418 43.315
79+
[64] 16 torch.float32 149.626 49.638
80+
[64] 32 torch.float32 218.847 63.921
81+
[64] 64 torch.float32 997.914 104.182
82+
[64] 128 torch.float32 4984.405 271.572
83+
[128] 2 torch.float32 47.061 42.474
84+
[128] 4 torch.float32 58.075 41.955
85+
[128] 8 torch.float32 94.416 42.927
86+
[128] 16 torch.float32 218.431 50.108
87+
[128] 32 torch.float32 361.651 64.927
88+
[128] 64 torch.float32 1863.643 133.470
89+
[256] 2 torch.float32 66.360 42.825
90+
[256] 4 torch.float32 79.120 43.048
91+
[256] 8 torch.float32 145.336 46.427
92+
[256] 16 torch.float32 330.696 51.697
93+
[256] 32 torch.float32 653.331 73.969
94+
[512] 2 torch.float32 119.886 43.343
95+
[512] 4 torch.float32 144.395 45.033
96+
[512] 8 torch.float32 262.083 47.363
97+
[512] 16 torch.float32 580.736 56.217
98+
[1024] 2 torch.float32 212.592 45.818
99+
[1024] 4 torch.float32 273.709 49.839
100+
[1024] 8 torch.float32 441.060 50.737

‎linalg/cholesky/readme.md

Lines changed: 97 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,111 +1,100 @@
1-
See pytorch PR https://github.com/pytorch/pytorch/pull/53104
2-
3-
before master commit 1.9.0a0+git68b6249
4-
5-
after commit 1.9.0a0+gitdce610d
6-
7-
This page was tested on E5-2680 v3 and RTX 2070 Super. Libraries: intel-mkl 2020.2.254-1 and cuda 11.2
8-
9-
- before: magma
10-
- after: potrf + potrfBatched (when batch_size >= 2) **[this PR]**
11-
121
time is in **us** (10^-6 s)
132

14-
|shape|cpu|before_magma|after_potrf_and_batched|
3+
|shape|cpu|before_magma|after_potrf64bit_and_magma|
154
|---:|---:|---:|---:|
16-
| [] 2 torch.float32 | 19.055 | 2280.221 | 35.030 |
17-
| [] 4 torch.float32 | 14.152 | 2303.458 | 35.046 |
18-
| [] 8 torch.float32 | 17.151 | 2326.320 | 35.175 |
19-
| [] 16 torch.float32 | 19.888 | 2358.930 | 35.133 |
20-
| [] 32 torch.float32 | 22.837 | 2341.613 | 35.418 |
21-
| [] 64 torch.float32 | 49.388 | 2383.232 | 42.917 |
22-
| [] 128 torch.float32 | 168.919 | 2431.619 | 82.173 |
23-
| [] 256 torch.float32 | 415.945 | 2884.719 | 171.727 |
24-
| [] 512 torch.float32 | 1567.114 | 3219.271 | 397.378 |
25-
| [] 1024 torch.float32 | 6100.324 | 3948.443 | 953.525 |
26-
| [] 2048 torch.float32 | 43855.028 | 6239.588 | 2755.491 |
27-
| [1] 2 torch.float32 | 17.567 | 55.177 | 35.341 |
28-
| [1] 4 torch.float32 | 17.678 | 55.194 | 35.539 |
29-
| [1] 8 torch.float32 | 18.053 | 56.533 | 35.596 |
30-
| [1] 16 torch.float32 | 19.512 | 62.417 | 35.927 |
31-
| [1] 32 torch.float32 | 22.422 | 74.961 | 35.769 |
32-
| [1] 64 torch.float32 | 54.413 | 102.557 | 42.919 |
33-
| [1] 128 torch.float32 | 169.238 | 170.707 | 70.264 |
34-
| [1] 256 torch.float32 | 432.289 | 374.914 | 146.682 |
35-
| [1] 512 torch.float32 | 1205.813 | 1147.444 | 337.876 |
36-
| [1] 1024 torch.float32 | 4601.706 | 4157.900 | 954.473 |
37-
| [1] 2048 torch.float32 | 28580.566 | 6390.159 | 2754.500 |
38-
| [2] 2 torch.float32 | 20.761 | 54.883 | 59.240 |
39-
| [2] 4 torch.float32 | 20.363 | 55.135 | 59.264 |
40-
| [2] 8 torch.float32 | 21.775 | 56.541 | 59.102 |
41-
| [2] 16 torch.float32 | 24.421 | 61.926 | 59.221 |
42-
| [2] 32 torch.float32 | 29.667 | 74.519 | 67.042 |
43-
| [2] 64 torch.float32 | 76.576 | 101.504 | 82.712 |
44-
| [2] 128 torch.float32 | 254.089 | 168.731 | 111.827 |
45-
| [2] 256 torch.float32 | 599.307 | 369.279 | 205.888 |
46-
| [2] 512 torch.float32 | 1637.425 | 1204.858 | 522.455 |
47-
| [2] 1024 torch.float32 | 5790.532 | 4448.121 | 1589.960 |
48-
| [2] 2048 torch.float32 | 40138.750 | 7395.678 | 5928.248 |
49-
| [4] 2 torch.float32 | 19.804 | 41.088 | 42.454 |
50-
| [4] 4 torch.float32 | 19.767 | 41.878 | 42.386 |
51-
| [4] 8 torch.float32 | 21.751 | 42.951 | 42.989 |
52-
| [4] 16 torch.float32 | 27.984 | 48.655 | 42.655 |
53-
| [4] 32 torch.float32 | 42.049 | 61.765 | 50.501 |
54-
| [4] 64 torch.float32 | 136.775 | 91.196 | 63.519 |
55-
| [4] 128 torch.float32 | 576.380 | 166.014 | 95.640 |
56-
| [4] 256 torch.float32 | 848.511 | 385.509 | 221.888 |
57-
| [4] 512 torch.float32 | 2480.692 | 1304.622 | 613.503 |
58-
| [4] 1024 torch.float32 | 7707.767 | 4886.211 | 2107.344 |
59-
| [4] 2048 torch.float32 | 55563.672 | 9442.459 | 8907.055 |
60-
| [8] 2 torch.float32 | 18.787 | 41.769 | 42.939 |
61-
| [8] 4 torch.float32 | 18.123 | 43.375 | 42.443 |
62-
| [8] 8 torch.float32 | 21.067 | 44.121 | 42.948 |
63-
| [8] 16 torch.float32 | 29.974 | 50.869 | 42.498 |
64-
| [8] 32 torch.float32 | 61.718 | 64.079 | 51.111 |
65-
| [8] 64 torch.float32 | 174.167 | 96.937 | 63.341 |
66-
| [8] 128 torch.float32 | 745.488 | 176.674 | 103.029 |
67-
| [8] 256 torch.float32 | 1331.958 | 432.471 | 258.526 |
68-
| [8] 512 torch.float32 | 4076.162 | 1462.709 | 790.236 |
69-
| [8] 1024 torch.float32 | 17392.765 | 5786.699 | 3151.025 |
70-
| [16] 2 torch.float32 | 20.866 | 42.434 | 43.010 |
71-
| [16] 4 torch.float32 | 21.590 | 43.252 | 42.318 |
72-
| [16] 8 torch.float32 | 27.167 | 44.373 | 42.324 |
73-
| [16] 16 torch.float32 | 49.028 | 51.100 | 42.846 |
74-
| [16] 32 torch.float32 | 116.404 | 64.965 | 49.931 |
75-
| [16] 64 torch.float32 | 388.556 | 98.290 | 63.181 |
76-
| [16] 128 torch.float32 | 1330.891 | 185.933 | 117.122 |
77-
| [16] 256 torch.float32 | 2376.293 | 492.626 | 335.315 |
78-
| [16] 512 torch.float32 | 7307.411 | 1754.295 | 1171.981 |
79-
| [32] 2 torch.float32 | 22.934 | 42.380 | 42.502 |
80-
| [32] 4 torch.float32 | 23.678 | 43.250 | 42.658 |
81-
| [32] 8 torch.float32 | 31.927 | 44.699 | 42.715 |
82-
| [32] 16 torch.float32 | 94.794 | 50.902 | 43.057 |
83-
| [32] 32 torch.float32 | 135.281 | 65.271 | 50.708 |
84-
| [32] 64 torch.float32 | 529.833 | 99.046 | 62.631 |
85-
| [32] 128 torch.float32 | 2911.740 | 214.874 | 153.772 |
86-
| [32] 256 torch.float32 | 4753.231 | 610.045 | 518.030 |
87-
| [64] 2 torch.float32 | 29.902 | 42.636 | 43.020 |
88-
| [64] 4 torch.float32 | 35.429 | 43.522 | 42.179 |
89-
| [64] 8 torch.float32 | 49.895 | 44.746 | 43.572 |
90-
| [64] 16 torch.float32 | 135.721 | 50.989 | 42.832 |
91-
| [64] 32 torch.float32 | 224.029 | 65.572 | 49.696 |
92-
| [64] 64 torch.float32 | 961.395 | 104.305 | 76.027 |
93-
| [64] 128 torch.float32 | 5609.256 | 271.441 | 225.720 |
94-
| [128] 2 torch.float32 | 52.542 | 42.892 | 42.747 |
95-
| [128] 4 torch.float32 | 62.965 | 43.840 | 43.114 |
96-
| [128] 8 torch.float32 | 96.781 | 44.832 | 42.930 |
97-
| [128] 16 torch.float32 | 222.146 | 51.180 | 42.766 |
98-
| [128] 32 torch.float32 | 376.536 | 66.161 | 49.907 |
99-
| [128] 64 torch.float32 | 1880.561 | 133.194 | 114.067 |
100-
| [256] 2 torch.float32 | 66.206 | 44.754 | 42.242 |
101-
| [256] 4 torch.float32 | 81.555 | 45.447 | 42.391 |
102-
| [256] 8 torch.float32 | 147.877 | 47.612 | 42.688 |
103-
| [256] 16 torch.float32 | 336.137 | 54.235 | 42.174 |
104-
| [256] 32 torch.float32 | 634.189 | 74.383 | 64.845 |
105-
| [512] 2 torch.float32 | 112.168 | 46.233 | 42.259 |
106-
| [512] 4 torch.float32 | 140.019 | 47.107 | 42.090 |
107-
| [512] 8 torch.float32 | 268.559 | 49.280 | 42.298 |
108-
| [512] 16 torch.float32 | 573.994 | 57.851 | 42.053 |
109-
| [1024] 2 torch.float32 | 203.898 | 48.189 | 41.716 |
110-
| [1024] 4 torch.float32 | 258.988 | 48.978 | 41.807 |
111-
| [1024] 8 torch.float32 | 438.113 | 53.205 | 41.791 |
5+
| [] 2 torch.float32 | 66.774 | 2280.221 | 58.959 |
6+
| [] 4 torch.float32 | 14.175 | 2303.458 | 59.223 |
7+
| [] 8 torch.float32 | 18.633 | 2326.320 | 59.109 |
8+
| [] 16 torch.float32 | 20.404 | 2358.930 | 59.451 |
9+
| [] 32 torch.float32 | 23.551 | 2341.613 | 59.565 |
10+
| [] 64 torch.float32 | 50.053 | 2383.232 | 83.184 |
11+
| [] 128 torch.float32 | 173.814 | 2431.619 | 120.336 |
12+
| [] 256 torch.float32 | 437.671 | 2884.719 | 208.042 |
13+
| [] 512 torch.float32 | 1672.962 | 3219.271 | 400.172 |
14+
| [] 1024 torch.float32 | 6052.655 | 3948.443 | 972.376 |
15+
| [] 2048 torch.float32 | 41786.490 | 6239.588 | 2771.738 |
16+
| [1] 2 torch.float32 | 16.018 | 55.177 | 58.924 |
17+
| [1] 4 torch.float32 | 15.890 | 55.194 | 64.280 |
18+
| [1] 8 torch.float32 | 16.391 | 56.533 | 59.009 |
19+
| [1] 16 torch.float32 | 17.996 | 62.417 | 59.372 |
20+
| [1] 32 torch.float32 | 21.377 | 74.961 | 59.963 |
21+
| [1] 64 torch.float32 | 54.598 | 102.557 | 75.136 |
22+
| [1] 128 torch.float32 | 153.839 | 170.707 | 112.467 |
23+
| [1] 256 torch.float32 | 411.058 | 374.914 | 186.747 |
24+
| [1] 512 torch.float32 | 1176.441 | 1147.444 | 359.730 |
25+
| [1] 1024 torch.float32 | 4504.277 | 4157.900 | 972.515 |
26+
| [1] 2048 torch.float32 | 28452.310 | 6390.159 | 2773.273 |
27+
| [2] 2 torch.float32 | 17.597 | 54.883 | 39.668 |
28+
| [2] 4 torch.float32 | 18.118 | 55.135 | 40.283 |
29+
| [2] 8 torch.float32 | 19.303 | 56.541 | 41.505 |
30+
| [2] 16 torch.float32 | 22.752 | 61.926 | 47.370 |
31+
| [2] 32 torch.float32 | 28.764 | 74.519 | 59.570 |
32+
| [2] 64 torch.float32 | 71.314 | 101.504 | 86.426 |
33+
| [2] 128 torch.float32 | 292.647 | 168.731 | 154.935 |
34+
| [2] 256 torch.float32 | 552.208 | 369.279 | 354.285 |
35+
| [2] 512 torch.float32 | 1600.543 | 1204.858 | 1201.538 |
36+
| [2] 1024 torch.float32 | 5972.632 | 4448.121 | 4521.013 |
37+
| [2] 2048 torch.float32 | 39196.011 | 7395.678 | 7465.609 |
38+
| [4] 2 torch.float32 | 18.258 | 41.088 | 40.741 |
39+
| [4] 4 torch.float32 | 18.169 | 41.878 | 40.869 |
40+
| [4] 8 torch.float32 | 20.840 | 42.951 | 43.534 |
41+
| [4] 16 torch.float32 | 27.483 | 48.655 | 48.096 |
42+
| [4] 32 torch.float32 | 41.822 | 61.765 | 60.854 |
43+
| [4] 64 torch.float32 | 138.791 | 91.196 | 91.095 |
44+
| [4] 128 torch.float32 | 574.721 | 166.014 | 165.793 |
45+
| [4] 256 torch.float32 | 757.351 | 385.509 | 384.977 |
46+
| [4] 512 torch.float32 | 2342.804 | 1304.622 | 1304.067 |
47+
| [4] 1024 torch.float32 | 9071.045 | 4886.211 | 4929.194 |
48+
| [4] 2048 torch.float32 | 57826.840 | 9442.459 | 9631.141 |
49+
| [8] 2 torch.float32 | 17.141 | 41.769 | 42.027 |
50+
| [8] 4 torch.float32 | 16.745 | 43.375 | 41.145 |
51+
| [8] 8 torch.float32 | 19.215 | 44.121 | 42.852 |
52+
| [8] 16 torch.float32 | 27.425 | 50.869 | 48.796 |
53+
| [8] 32 torch.float32 | 46.709 | 64.079 | 62.668 |
54+
| [8] 64 torch.float32 | 174.225 | 96.937 | 95.206 |
55+
| [8] 128 torch.float32 | 762.877 | 176.674 | 175.077 |
56+
| [8] 256 torch.float32 | 1386.097 | 432.471 | 429.544 |
57+
| [8] 512 torch.float32 | 4195.243 | 1462.709 | 1457.487 |
58+
| [8] 1024 torch.float32 | 19305.512 | 5786.699 | 5868.911 |
59+
| [16] 2 torch.float32 | 19.890 | 42.434 | 45.466 |
60+
| [16] 4 torch.float32 | 20.660 | 43.252 | 43.327 |
61+
| [16] 8 torch.float32 | 26.371 | 44.373 | 46.226 |
62+
| [16] 16 torch.float32 | 48.751 | 51.100 | 50.815 |
63+
| [16] 32 torch.float32 | 94.477 | 64.965 | 63.677 |
64+
| [16] 64 torch.float32 | 377.310 | 98.290 | 96.308 |
65+
| [16] 128 torch.float32 | 1474.216 | 185.933 | 184.481 |
66+
| [16] 256 torch.float32 | 2405.352 | 492.626 | 490.029 |
67+
| [16] 512 torch.float32 | 7828.434 | 1754.295 | 1753.410 |
68+
| [32] 2 torch.float32 | 21.078 | 42.380 | 40.783 |
69+
| [32] 4 torch.float32 | 22.185 | 43.250 | 42.487 |
70+
| [32] 8 torch.float32 | 31.124 | 44.699 | 43.080 |
71+
| [32] 16 torch.float32 | 87.727 | 50.902 | 49.999 |
72+
| [32] 32 torch.float32 | 131.084 | 65.271 | 64.570 |
73+
| [32] 64 torch.float32 | 539.154 | 99.046 | 97.589 |
74+
| [32] 128 torch.float32 | 2753.116 | 214.874 | 215.273 |
75+
| [32] 256 torch.float32 | 4813.017 | 610.045 | 610.006 |
76+
| [64] 2 torch.float32 | 27.769 | 42.636 | 40.654 |
77+
| [64] 4 torch.float32 | 29.520 | 43.522 | 41.364 |
78+
| [64] 8 torch.float32 | 47.020 | 44.746 | 43.315 |
79+
| [64] 16 torch.float32 | 132.615 | 50.989 | 49.638 |
80+
| [64] 32 torch.float32 | 217.310 | 65.572 | 63.921 |
81+
| [64] 64 torch.float32 | 986.923 | 104.305 | 104.182 |
82+
| [64] 128 torch.float32 | 5390.051 | 271.441 | 271.572 |
83+
| [128] 2 torch.float32 | 50.669 | 42.892 | 42.474 |
84+
| [128] 4 torch.float32 | 60.421 | 43.840 | 41.955 |
85+
| [128] 8 torch.float32 | 96.448 | 44.832 | 42.927 |
86+
| [128] 16 torch.float32 | 219.459 | 51.180 | 50.108 |
87+
| [128] 32 torch.float32 | 364.438 | 66.161 | 64.927 |
88+
| [128] 64 torch.float32 | 1903.152 | 133.194 | 133.470 |
89+
| [256] 2 torch.float32 | 67.254 | 44.754 | 42.825 |
90+
| [256] 4 torch.float32 | 80.547 | 45.447 | 43.048 |
91+
| [256] 8 torch.float32 | 146.505 | 47.612 | 46.427 |
92+
| [256] 16 torch.float32 | 332.615 | 54.235 | 51.697 |
93+
| [256] 32 torch.float32 | 660.817 | 74.383 | 73.969 |
94+
| [512] 2 torch.float32 | 118.439 | 46.233 | 43.343 |
95+
| [512] 4 torch.float32 | 144.591 | 47.107 | 45.033 |
96+
| [512] 8 torch.float32 | 267.354 | 49.280 | 47.363 |
97+
| [512] 16 torch.float32 | 581.125 | 57.851 | 56.217 |
98+
| [1024] 2 torch.float32 | 218.353 | 48.189 | 45.818 |
99+
| [1024] 4 torch.float32 | 276.394 | 48.978 | 49.839 |
100+
| [1024] 8 torch.float32 | 451.058 | 53.205 | 50.737 |

0 commit comments

Comments
 (0)
Please sign in to comment.