Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8a8d394

Browse files
committedMar 24, 2021
update script
1 parent 4ea09eb commit 8a8d394

File tree

7 files changed

+583
-585
lines changed

7 files changed

+583
-585
lines changed
 

‎linalg/cholesky-solve/linalg-prof.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import itertools
44
import gc
55
import json
6+
import multiprocessing
7+
import threading
68

79
from torch.testing._internal.common_utils import random_hermitian_pd_matrix
810

@@ -45,8 +47,8 @@ def prof(b_, n_, dtype=torch.float, p=None, flag=None):
4547

4648
# print(b_, n_)
4749
# x = torch.randn(*b_, n_, n_, device='cuda', dtype=dtype)
48-
zo = random_hermitian_pd_matrix(n_, *b_, device='cuda', dtype=torch.float64)
49-
z = torch.cholesky(zo).to(dtype=dtype)
50+
zo = random_hermitian_pd_matrix(n_, *b_, device='cpu', dtype=torch.float64).cuda()
51+
z = torch.cholesky(zo.cpu()).to(dtype=dtype, device='cuda')
5052
x = torch.randn(*b_, n_, n_, device='cuda').to(dtype=dtype)
5153
# x = torch.randn(*b_, n_, 1, device='cuda').to(dtype=dtype)
5254

@@ -125,7 +127,9 @@ def prof(b_, n_, dtype=torch.float, p=None, flag=None):
125127
f'cpu_time({TIME_UNIT}), gpu_time({TIME_UNIT})')
126128

127129
for b, n in itertools.product(
128-
[[]] + [[2**i] for i in range(11)],
130+
# [[]] + [[2**i] for i in range(11)],
131+
[[], [1]],
132+
# [[2**i] for i in range(1, 11)],
129133
[2**j for j in range(1, 12, 1)]
130134
):
131135
if b and b[0] * n >= 2**14:

‎linalg/cholesky-solve/prof_after_heuristics.txt

Lines changed: 96 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -2,101 +2,99 @@
22
1.9.0a0+git2b5c5c4
33

44
batch_size, matrix_size, dtype cpu_time(us), gpu_time(us)
5-
[] 2 torch.float32 105.547 75.507
6-
[] 4 torch.float32 9.592 75.125
7-
[] 8 torch.float32 10.310 75.818
8-
[] 16 torch.float32 10.427 68.911
9-
[] 32 torch.float32 13.537 77.344
10-
[] 64 torch.float32 60.569 86.546
11-
[] 128 torch.float32 99.032 119.070
12-
[] 256 torch.float32 280.218 201.018
13-
[] 512 torch.float32 1089.866 490.519
14-
[] 1024 torch.float32 6125.575 1335.486
15-
[] 2048 torch.float32 42986.248 5497.439
16-
[1] 2 torch.float32 9.669 73.801
17-
[1] 4 torch.float32 9.311 73.138
18-
[1] 8 torch.float32 10.223 73.413
19-
[1] 16 torch.float32 10.821 67.235
20-
[1] 32 torch.float32 13.647 69.747
21-
[1] 64 torch.float32 56.102 83.778
22-
[1] 128 torch.float32 164.089 109.557
23-
[1] 256 torch.float32 300.865 185.843
24-
[1] 512 torch.float32 835.133 427.641
25-
[1] 1024 torch.float32 4356.145 1345.123
26-
[1] 2048 torch.float32 26658.406 5495.042
27-
[2] 2 torch.float32 10.254 48.923
28-
[2] 4 torch.float32 10.238 48.424
29-
[2] 8 torch.float32 10.865 49.670
30-
[2] 16 torch.float32 12.029 49.565
31-
[2] 32 torch.float32 18.553 335.974
32-
[2] 64 torch.float32 83.658 405.704
33-
[2] 128 torch.float32 170.118 529.372
34-
[2] 256 torch.float32 365.396 830.517
35-
[2] 512 torch.float32 1402.911 1562.380
36-
[2] 1024 torch.float32 8500.582 3699.644
37-
numerical mismatch: reconstruct value compare
38-
With rtol=0.001 and atol=0.001, found 1 element(s) (out of 8388608) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 0.0010547935962677002 (0.04455813765525818 vs. 0.04350334405899048), which occurred at index (1, 452, 1011).
39-
[2] 2048 torch.float32 60374.918 13091.600
40-
[4] 2 torch.float32 11.771 51.131
41-
[4] 4 torch.float32 12.223 49.632
42-
[4] 8 torch.float32 12.293 51.562
43-
[4] 16 torch.float32 15.020 50.697
44-
[4] 32 torch.float32 26.133 335.603
45-
[4] 64 torch.float32 154.521 459.424
46-
[4] 128 torch.float32 269.843 556.146
47-
[4] 256 torch.float32 571.958 888.574
48-
[4] 512 torch.float32 2527.016 1773.859
49-
[4] 1024 torch.float32 17031.137 4997.247
50-
[4] 2048 torch.float32 119452.786 21604.799
51-
[8] 2 torch.float32 17.611 66.310
52-
[8] 4 torch.float32 19.614 65.430
53-
[8] 8 torch.float32 18.976 66.751
54-
[8] 16 torch.float32 24.600 66.377
55-
[8] 32 torch.float32 49.813 368.210
56-
[8] 64 torch.float32 296.102 518.253
57-
[8] 128 torch.float32 415.326 607.669
58-
[8] 256 torch.float32 1095.607 1049.521
59-
[8] 512 torch.float32 5024.378 2348.893
60-
[8] 1024 torch.float32 42197.851 7945.452
61-
[16] 2 torch.float32 23.073 66.698
62-
[16] 4 torch.float32 24.247 66.334
63-
[16] 8 torch.float32 25.295 66.991
64-
[16] 16 torch.float32 36.662 66.900
65-
[16] 32 torch.float32 86.474 375.259
66-
[16] 64 torch.float32 520.860 456.016
67-
[16] 128 torch.float32 715.033 654.156
68-
[16] 256 torch.float32 2046.187 1219.178
69-
[16] 512 torch.float32 10900.669 3345.146
70-
[32] 2 torch.float32 31.379 66.758
71-
[32] 4 torch.float32 37.876 66.538
72-
[32] 8 torch.float32 39.243 67.152
73-
[32] 16 torch.float32 59.557 67.266
74-
[32] 32 torch.float32 157.140 383.520
75-
[32] 64 torch.float32 955.098 512.199
76-
[32] 128 torch.float32 1370.115 723.370
77-
[32] 256 torch.float32 4047.383 1559.268
78-
[64] 2 torch.float32 49.703 67.573
79-
[64] 4 torch.float32 59.655 67.368
80-
[64] 8 torch.float32 63.415 67.888
81-
[64] 16 torch.float32 104.959 68.390
82-
[64] 32 torch.float32 294.157 381.888
83-
[64] 64 torch.float32 1776.475 486.399
84-
[64] 128 torch.float32 2635.866 829.155
85-
[128] 2 torch.float32 85.740 68.507
86-
[128] 4 torch.float32 105.935 67.955
87-
[128] 8 torch.float32 132.358 69.039
88-
[128] 16 torch.float32 194.751 69.127
89-
[128] 32 torch.float32 530.604 386.889
90-
[128] 64 torch.float32 3484.117 522.555
91-
[256] 2 torch.float32 159.428 68.678
92-
[256] 4 torch.float32 199.956 68.533
93-
[256] 8 torch.float32 207.843 69.817
94-
[256] 16 torch.float32 370.517 73.783
95-
[256] 32 torch.float32 998.839 415.101
96-
[512] 2 torch.float32 312.570 72.967
97-
[512] 4 torch.float32 386.612 73.049
98-
[512] 8 torch.float32 401.845 75.147
99-
[512] 16 torch.float32 663.637 79.657
100-
[1024] 2 torch.float32 599.290 85.372
101-
[1024] 4 torch.float32 766.145 84.642
102-
[1024] 8 torch.float32 797.913 88.762
5+
[] 2 torch.float32 16.669 55.707
6+
[] 4 torch.float32 8.988 56.279
7+
[] 8 torch.float32 9.606 56.050
8+
[] 16 torch.float32 10.211 48.753
9+
[] 32 torch.float32 13.781 48.180
10+
[] 64 torch.float32 74.065 48.604
11+
[] 128 torch.float32 136.915 65.295
12+
[] 256 torch.float32 374.116 152.605
13+
[] 512 torch.float32 1325.188 445.672
14+
[] 1024 torch.float32 6287.731 1293.905
15+
[] 2048 torch.float32 44692.626 5430.511
16+
[1] 2 torch.float32 9.704 55.768
17+
[1] 4 torch.float32 9.832 55.861
18+
[1] 8 torch.float32 10.263 55.057
19+
[1] 16 torch.float32 10.672 48.378
20+
[1] 32 torch.float32 13.843 47.741
21+
[1] 64 torch.float32 68.520 47.746
22+
[1] 128 torch.float32 117.593 55.726
23+
[1] 256 torch.float32 336.442 130.063
24+
[1] 512 torch.float32 839.713 381.789
25+
[1] 1024 torch.float32 4412.975 1292.598
26+
[1] 2048 torch.float32 26825.097 5430.759
27+
[2] 2 torch.float32 13.492 46.058
28+
[2] 4 torch.float32 14.539 46.946
29+
[2] 8 torch.float32 14.362 47.836
30+
[2] 16 torch.float32 15.944 48.006
31+
[2] 32 torch.float32 21.756 106.976
32+
[2] 64 torch.float32 100.293 172.113
33+
[2] 128 torch.float32 214.446 300.800
34+
[2] 256 torch.float32 449.424 545.602
35+
[2] 512 torch.float32 1418.239 1192.834
36+
[2] 1024 torch.float32 9019.808 3235.049
37+
[2] 2048 torch.float32 61188.488 12367.597
38+
[4] 2 torch.float32 15.150 46.592
39+
[4] 4 torch.float32 15.451 46.855
40+
[4] 8 torch.float32 16.447 47.244
41+
[4] 16 torch.float32 18.390 47.177
42+
[4] 32 torch.float32 30.252 104.076
43+
[4] 64 torch.float32 178.246 160.445
44+
[4] 128 torch.float32 343.771 289.514
45+
[4] 256 torch.float32 614.417 579.383
46+
[4] 512 torch.float32 2551.181 1408.043
47+
[4] 1024 torch.float32 17698.177 4557.618
48+
[4] 2048 torch.float32 120526.804 20665.992
49+
[8] 2 torch.float32 17.738 46.515
50+
[8] 4 torch.float32 19.377 47.021
51+
[8] 8 torch.float32 19.701 48.137
52+
[8] 16 torch.float32 24.233 47.501
53+
[8] 32 torch.float32 46.948 105.450
54+
[8] 64 torch.float32 326.774 165.417
55+
[8] 128 torch.float32 408.528 307.311
56+
[8] 256 torch.float32 1171.613 671.861
57+
[8] 512 torch.float32 4582.418 1905.267
58+
[8] 1024 torch.float32 42341.189 7214.016
59+
[16] 2 torch.float32 23.400 46.384
60+
[16] 4 torch.float32 25.423 46.729
61+
[16] 8 torch.float32 26.428 47.852
62+
[16] 16 torch.float32 36.230 47.441
63+
[16] 32 torch.float32 80.811 111.544
64+
[16] 64 torch.float32 540.700 178.012
65+
[16] 128 torch.float32 705.543 337.825
66+
[16] 256 torch.float32 2039.275 866.897
67+
[16] 512 torch.float32 11060.784 2865.669
68+
[32] 2 torch.float32 31.195 48.693
69+
[32] 4 torch.float32 34.950 48.283
70+
[32] 8 torch.float32 37.085 49.325
71+
[32] 16 torch.float32 56.288 49.118
72+
[32] 32 torch.float32 149.209 114.408
73+
[32] 64 torch.float32 991.201 187.465
74+
[32] 128 torch.float32 1440.597 418.658
75+
[32] 256 torch.float32 3899.989 1261.094
76+
[64] 2 torch.float32 50.695 48.414
77+
[64] 4 torch.float32 60.258 48.846
78+
[64] 8 torch.float32 62.575 49.696
79+
[64] 16 torch.float32 102.202 49.616
80+
[64] 32 torch.float32 345.608 118.163
81+
[64] 64 torch.float32 1797.026 209.671
82+
[64] 128 torch.float32 2687.032 556.512
83+
[128] 2 torch.float32 91.550 49.189
84+
[128] 4 torch.float32 110.788 49.498
85+
[128] 8 torch.float32 114.526 50.550
86+
[128] 16 torch.float32 195.040 50.784
87+
[128] 32 torch.float32 533.968 125.271
88+
[128] 64 torch.float32 3449.807 257.297
89+
[256] 2 torch.float32 173.467 50.836
90+
[256] 4 torch.float32 210.514 50.964
91+
[256] 8 torch.float32 219.431 52.439
92+
[256] 16 torch.float32 411.190 53.635
93+
[256] 32 torch.float32 994.360 154.498
94+
[512] 2 torch.float32 335.943 54.528
95+
[512] 4 torch.float32 423.930 55.541
96+
[512] 8 torch.float32 470.815 57.449
97+
[512] 16 torch.float32 694.957 63.316
98+
[1024] 2 torch.float32 662.553 65.860
99+
[1024] 4 torch.float32 822.936 66.063
100+
[1024] 8 torch.float32 892.930 70.187

0 commit comments

Comments
 (0)
Please sign in to comment.