|
2 | 2 | 1.9.0a0+git2b5c5c4
|
3 | 3 |
|
4 | 4 | batch_size, matrix_size, dtype cpu_time(us), gpu_time(us)
|
5 |
| -[] 2 torch.float32 105.547 75.507 |
6 |
| -[] 4 torch.float32 9.592 75.125 |
7 |
| -[] 8 torch.float32 10.310 75.818 |
8 |
| -[] 16 torch.float32 10.427 68.911 |
9 |
| -[] 32 torch.float32 13.537 77.344 |
10 |
| -[] 64 torch.float32 60.569 86.546 |
11 |
| -[] 128 torch.float32 99.032 119.070 |
12 |
| -[] 256 torch.float32 280.218 201.018 |
13 |
| -[] 512 torch.float32 1089.866 490.519 |
14 |
| -[] 1024 torch.float32 6125.575 1335.486 |
15 |
| -[] 2048 torch.float32 42986.248 5497.439 |
16 |
| -[1] 2 torch.float32 9.669 73.801 |
17 |
| -[1] 4 torch.float32 9.311 73.138 |
18 |
| -[1] 8 torch.float32 10.223 73.413 |
19 |
| -[1] 16 torch.float32 10.821 67.235 |
20 |
| -[1] 32 torch.float32 13.647 69.747 |
21 |
| -[1] 64 torch.float32 56.102 83.778 |
22 |
| -[1] 128 torch.float32 164.089 109.557 |
23 |
| -[1] 256 torch.float32 300.865 185.843 |
24 |
| -[1] 512 torch.float32 835.133 427.641 |
25 |
| -[1] 1024 torch.float32 4356.145 1345.123 |
26 |
| -[1] 2048 torch.float32 26658.406 5495.042 |
27 |
| -[2] 2 torch.float32 10.254 48.923 |
28 |
| -[2] 4 torch.float32 10.238 48.424 |
29 |
| -[2] 8 torch.float32 10.865 49.670 |
30 |
| -[2] 16 torch.float32 12.029 49.565 |
31 |
| -[2] 32 torch.float32 18.553 335.974 |
32 |
| -[2] 64 torch.float32 83.658 405.704 |
33 |
| -[2] 128 torch.float32 170.118 529.372 |
34 |
| -[2] 256 torch.float32 365.396 830.517 |
35 |
| -[2] 512 torch.float32 1402.911 1562.380 |
36 |
| -[2] 1024 torch.float32 8500.582 3699.644 |
37 |
| -numerical mismatch: reconstruct value compare |
38 |
| -With rtol=0.001 and atol=0.001, found 1 element(s) (out of 8388608) whose difference(s) exceeded the margin of error (including 0 nan comparisons). The greatest difference was 0.0010547935962677002 (0.04455813765525818 vs. 0.04350334405899048), which occurred at index (1, 452, 1011). |
39 |
| -[2] 2048 torch.float32 60374.918 13091.600 |
40 |
| -[4] 2 torch.float32 11.771 51.131 |
41 |
| -[4] 4 torch.float32 12.223 49.632 |
42 |
| -[4] 8 torch.float32 12.293 51.562 |
43 |
| -[4] 16 torch.float32 15.020 50.697 |
44 |
| -[4] 32 torch.float32 26.133 335.603 |
45 |
| -[4] 64 torch.float32 154.521 459.424 |
46 |
| -[4] 128 torch.float32 269.843 556.146 |
47 |
| -[4] 256 torch.float32 571.958 888.574 |
48 |
| -[4] 512 torch.float32 2527.016 1773.859 |
49 |
| -[4] 1024 torch.float32 17031.137 4997.247 |
50 |
| -[4] 2048 torch.float32 119452.786 21604.799 |
51 |
| -[8] 2 torch.float32 17.611 66.310 |
52 |
| -[8] 4 torch.float32 19.614 65.430 |
53 |
| -[8] 8 torch.float32 18.976 66.751 |
54 |
| -[8] 16 torch.float32 24.600 66.377 |
55 |
| -[8] 32 torch.float32 49.813 368.210 |
56 |
| -[8] 64 torch.float32 296.102 518.253 |
57 |
| -[8] 128 torch.float32 415.326 607.669 |
58 |
| -[8] 256 torch.float32 1095.607 1049.521 |
59 |
| -[8] 512 torch.float32 5024.378 2348.893 |
60 |
| -[8] 1024 torch.float32 42197.851 7945.452 |
61 |
| -[16] 2 torch.float32 23.073 66.698 |
62 |
| -[16] 4 torch.float32 24.247 66.334 |
63 |
| -[16] 8 torch.float32 25.295 66.991 |
64 |
| -[16] 16 torch.float32 36.662 66.900 |
65 |
| -[16] 32 torch.float32 86.474 375.259 |
66 |
| -[16] 64 torch.float32 520.860 456.016 |
67 |
| -[16] 128 torch.float32 715.033 654.156 |
68 |
| -[16] 256 torch.float32 2046.187 1219.178 |
69 |
| -[16] 512 torch.float32 10900.669 3345.146 |
70 |
| -[32] 2 torch.float32 31.379 66.758 |
71 |
| -[32] 4 torch.float32 37.876 66.538 |
72 |
| -[32] 8 torch.float32 39.243 67.152 |
73 |
| -[32] 16 torch.float32 59.557 67.266 |
74 |
| -[32] 32 torch.float32 157.140 383.520 |
75 |
| -[32] 64 torch.float32 955.098 512.199 |
76 |
| -[32] 128 torch.float32 1370.115 723.370 |
77 |
| -[32] 256 torch.float32 4047.383 1559.268 |
78 |
| -[64] 2 torch.float32 49.703 67.573 |
79 |
| -[64] 4 torch.float32 59.655 67.368 |
80 |
| -[64] 8 torch.float32 63.415 67.888 |
81 |
| -[64] 16 torch.float32 104.959 68.390 |
82 |
| -[64] 32 torch.float32 294.157 381.888 |
83 |
| -[64] 64 torch.float32 1776.475 486.399 |
84 |
| -[64] 128 torch.float32 2635.866 829.155 |
85 |
| -[128] 2 torch.float32 85.740 68.507 |
86 |
| -[128] 4 torch.float32 105.935 67.955 |
87 |
| -[128] 8 torch.float32 132.358 69.039 |
88 |
| -[128] 16 torch.float32 194.751 69.127 |
89 |
| -[128] 32 torch.float32 530.604 386.889 |
90 |
| -[128] 64 torch.float32 3484.117 522.555 |
91 |
| -[256] 2 torch.float32 159.428 68.678 |
92 |
| -[256] 4 torch.float32 199.956 68.533 |
93 |
| -[256] 8 torch.float32 207.843 69.817 |
94 |
| -[256] 16 torch.float32 370.517 73.783 |
95 |
| -[256] 32 torch.float32 998.839 415.101 |
96 |
| -[512] 2 torch.float32 312.570 72.967 |
97 |
| -[512] 4 torch.float32 386.612 73.049 |
98 |
| -[512] 8 torch.float32 401.845 75.147 |
99 |
| -[512] 16 torch.float32 663.637 79.657 |
100 |
| -[1024] 2 torch.float32 599.290 85.372 |
101 |
| -[1024] 4 torch.float32 766.145 84.642 |
102 |
| -[1024] 8 torch.float32 797.913 88.762 |
| 5 | +[] 2 torch.float32 16.669 55.707 |
| 6 | +[] 4 torch.float32 8.988 56.279 |
| 7 | +[] 8 torch.float32 9.606 56.050 |
| 8 | +[] 16 torch.float32 10.211 48.753 |
| 9 | +[] 32 torch.float32 13.781 48.180 |
| 10 | +[] 64 torch.float32 74.065 48.604 |
| 11 | +[] 128 torch.float32 136.915 65.295 |
| 12 | +[] 256 torch.float32 374.116 152.605 |
| 13 | +[] 512 torch.float32 1325.188 445.672 |
| 14 | +[] 1024 torch.float32 6287.731 1293.905 |
| 15 | +[] 2048 torch.float32 44692.626 5430.511 |
| 16 | +[1] 2 torch.float32 9.704 55.768 |
| 17 | +[1] 4 torch.float32 9.832 55.861 |
| 18 | +[1] 8 torch.float32 10.263 55.057 |
| 19 | +[1] 16 torch.float32 10.672 48.378 |
| 20 | +[1] 32 torch.float32 13.843 47.741 |
| 21 | +[1] 64 torch.float32 68.520 47.746 |
| 22 | +[1] 128 torch.float32 117.593 55.726 |
| 23 | +[1] 256 torch.float32 336.442 130.063 |
| 24 | +[1] 512 torch.float32 839.713 381.789 |
| 25 | +[1] 1024 torch.float32 4412.975 1292.598 |
| 26 | +[1] 2048 torch.float32 26825.097 5430.759 |
| 27 | +[2] 2 torch.float32 13.492 46.058 |
| 28 | +[2] 4 torch.float32 14.539 46.946 |
| 29 | +[2] 8 torch.float32 14.362 47.836 |
| 30 | +[2] 16 torch.float32 15.944 48.006 |
| 31 | +[2] 32 torch.float32 21.756 106.976 |
| 32 | +[2] 64 torch.float32 100.293 172.113 |
| 33 | +[2] 128 torch.float32 214.446 300.800 |
| 34 | +[2] 256 torch.float32 449.424 545.602 |
| 35 | +[2] 512 torch.float32 1418.239 1192.834 |
| 36 | +[2] 1024 torch.float32 9019.808 3235.049 |
| 37 | +[2] 2048 torch.float32 61188.488 12367.597 |
| 38 | +[4] 2 torch.float32 15.150 46.592 |
| 39 | +[4] 4 torch.float32 15.451 46.855 |
| 40 | +[4] 8 torch.float32 16.447 47.244 |
| 41 | +[4] 16 torch.float32 18.390 47.177 |
| 42 | +[4] 32 torch.float32 30.252 104.076 |
| 43 | +[4] 64 torch.float32 178.246 160.445 |
| 44 | +[4] 128 torch.float32 343.771 289.514 |
| 45 | +[4] 256 torch.float32 614.417 579.383 |
| 46 | +[4] 512 torch.float32 2551.181 1408.043 |
| 47 | +[4] 1024 torch.float32 17698.177 4557.618 |
| 48 | +[4] 2048 torch.float32 120526.804 20665.992 |
| 49 | +[8] 2 torch.float32 17.738 46.515 |
| 50 | +[8] 4 torch.float32 19.377 47.021 |
| 51 | +[8] 8 torch.float32 19.701 48.137 |
| 52 | +[8] 16 torch.float32 24.233 47.501 |
| 53 | +[8] 32 torch.float32 46.948 105.450 |
| 54 | +[8] 64 torch.float32 326.774 165.417 |
| 55 | +[8] 128 torch.float32 408.528 307.311 |
| 56 | +[8] 256 torch.float32 1171.613 671.861 |
| 57 | +[8] 512 torch.float32 4582.418 1905.267 |
| 58 | +[8] 1024 torch.float32 42341.189 7214.016 |
| 59 | +[16] 2 torch.float32 23.400 46.384 |
| 60 | +[16] 4 torch.float32 25.423 46.729 |
| 61 | +[16] 8 torch.float32 26.428 47.852 |
| 62 | +[16] 16 torch.float32 36.230 47.441 |
| 63 | +[16] 32 torch.float32 80.811 111.544 |
| 64 | +[16] 64 torch.float32 540.700 178.012 |
| 65 | +[16] 128 torch.float32 705.543 337.825 |
| 66 | +[16] 256 torch.float32 2039.275 866.897 |
| 67 | +[16] 512 torch.float32 11060.784 2865.669 |
| 68 | +[32] 2 torch.float32 31.195 48.693 |
| 69 | +[32] 4 torch.float32 34.950 48.283 |
| 70 | +[32] 8 torch.float32 37.085 49.325 |
| 71 | +[32] 16 torch.float32 56.288 49.118 |
| 72 | +[32] 32 torch.float32 149.209 114.408 |
| 73 | +[32] 64 torch.float32 991.201 187.465 |
| 74 | +[32] 128 torch.float32 1440.597 418.658 |
| 75 | +[32] 256 torch.float32 3899.989 1261.094 |
| 76 | +[64] 2 torch.float32 50.695 48.414 |
| 77 | +[64] 4 torch.float32 60.258 48.846 |
| 78 | +[64] 8 torch.float32 62.575 49.696 |
| 79 | +[64] 16 torch.float32 102.202 49.616 |
| 80 | +[64] 32 torch.float32 345.608 118.163 |
| 81 | +[64] 64 torch.float32 1797.026 209.671 |
| 82 | +[64] 128 torch.float32 2687.032 556.512 |
| 83 | +[128] 2 torch.float32 91.550 49.189 |
| 84 | +[128] 4 torch.float32 110.788 49.498 |
| 85 | +[128] 8 torch.float32 114.526 50.550 |
| 86 | +[128] 16 torch.float32 195.040 50.784 |
| 87 | +[128] 32 torch.float32 533.968 125.271 |
| 88 | +[128] 64 torch.float32 3449.807 257.297 |
| 89 | +[256] 2 torch.float32 173.467 50.836 |
| 90 | +[256] 4 torch.float32 210.514 50.964 |
| 91 | +[256] 8 torch.float32 219.431 52.439 |
| 92 | +[256] 16 torch.float32 411.190 53.635 |
| 93 | +[256] 32 torch.float32 994.360 154.498 |
| 94 | +[512] 2 torch.float32 335.943 54.528 |
| 95 | +[512] 4 torch.float32 423.930 55.541 |
| 96 | +[512] 8 torch.float32 470.815 57.449 |
| 97 | +[512] 16 torch.float32 694.957 63.316 |
| 98 | +[1024] 2 torch.float32 662.553 65.860 |
| 99 | +[1024] 4 torch.float32 822.936 66.063 |
| 100 | +[1024] 8 torch.float32 892.930 70.187 |
0 commit comments