12
12
13
13
#define copy (dst, src, count ) for (int i = 0 ; i != count; ++i) { (dst)[i] = (src)[i]; }
14
14
15
-
16
- #if __CUDA_ARCH__ < SHUFFLE_MIN_VER
17
- #include " keccak_u64.cuh"
18
- #include " dagger_shared.cuh"
19
- #else
20
15
#include " keccak.cuh"
21
16
#include " dagger_shuffled.cuh"
22
- #endif
23
17
24
18
template <uint32_t _PARALLEL_HASH>
25
19
__global__ void
26
20
ethash_search (
27
- volatile uint32_t * g_output,
21
+ volatile search_results * g_output,
28
22
uint64_t start_nonce
29
23
)
30
24
{
31
25
uint32_t const gid = blockIdx .x * blockDim .x + threadIdx .x ;
32
26
uint2 mix[4 ];
33
27
if (compute_hash<_PARALLEL_HASH>(start_nonce + gid, d_target, mix))
34
28
return ;
35
- uint32_t index = atomicInc (const_cast < uint32_t *>(g_output) , 0xffffffff ) + 1 ;
36
- if (index >= SEARCH_RESULT_ENTRIES )
29
+ uint32_t index = atomicInc (( uint32_t *)&g_output-> count , 0xffffffff );
30
+ if (index >= SEARCH_RESULTS )
37
31
return ;
38
- g_output[index ] = gid;
39
- g_output[index + (SEARCH_RESULT_ENTRIES * 1 ) ] = mix[0 ].x ;
40
- g_output[index + (SEARCH_RESULT_ENTRIES * 2 ) ] = mix[0 ].y ;
41
- g_output[index + (SEARCH_RESULT_ENTRIES * 3 ) ] = mix[1 ].x ;
42
- g_output[index + (SEARCH_RESULT_ENTRIES * 4 ) ] = mix[1 ].y ;
43
- g_output[index + (SEARCH_RESULT_ENTRIES * 5 ) ] = mix[2 ].x ;
44
- g_output[index + (SEARCH_RESULT_ENTRIES * 6 ) ] = mix[2 ].y ;
45
- g_output[index + (SEARCH_RESULT_ENTRIES * 7 ) ] = mix[3 ].x ;
46
- g_output[index + (SEARCH_RESULT_ENTRIES * 8 ) ] = mix[3 ].y ;
32
+ g_output-> result [index ]. gid = gid;
33
+ g_output-> result [index ]. mix [ 0 ] = mix[0 ].x ;
34
+ g_output-> result [index ]. mix [ 1 ] = mix[0 ].y ;
35
+ g_output-> result [index ]. mix [ 2 ] = mix[1 ].x ;
36
+ g_output-> result [index ]. mix [ 3 ] = mix[1 ].y ;
37
+ g_output-> result [index ]. mix [ 4 ] = mix[2 ].x ;
38
+ g_output-> result [index ]. mix [ 5 ] = mix[2 ].y ;
39
+ g_output-> result [index ]. mix [ 6 ] = mix[3 ].x ;
40
+ g_output-> result [index ]. mix [ 7 ] = mix[3 ].y ;
47
41
}
48
42
49
43
void run_ethash_search (
50
44
uint32_t blocks,
51
45
uint32_t threads,
52
- uint32_t sharedbytes,
53
46
cudaStream_t stream,
54
- volatile uint32_t * g_output,
47
+ volatile search_results * g_output,
55
48
uint64_t start_nonce,
56
49
uint32_t parallelHash
57
50
)
58
51
{
59
52
switch (parallelHash)
60
53
{
61
- case 1 : ethash_search <1 > <<<blocks, threads, sharedbytes , stream >>> (g_output, start_nonce); break ;
62
- case 2 : ethash_search <2 > <<<blocks, threads, sharedbytes , stream >>> (g_output, start_nonce); break ;
63
- case 4 : ethash_search <4 > <<<blocks, threads, sharedbytes , stream >>> (g_output, start_nonce); break ;
64
- case 8 : ethash_search <8 > <<<blocks, threads, sharedbytes , stream >>> (g_output, start_nonce); break ;
65
- default : ethash_search <4 > <<<blocks, threads, sharedbytes , stream >>> (g_output, start_nonce); break ;
54
+ case 1 : ethash_search <1 > <<<blocks, threads, 0 , stream >>> (g_output, start_nonce); break ;
55
+ case 2 : ethash_search <2 > <<<blocks, threads, 0 , stream >>> (g_output, start_nonce); break ;
56
+ case 4 : ethash_search <4 > <<<blocks, threads, 0 , stream >>> (g_output, start_nonce); break ;
57
+ case 8 : ethash_search <8 > <<<blocks, threads, 0 , stream >>> (g_output, start_nonce); break ;
58
+ default : ethash_search <4 > <<<blocks, threads, 0 , stream >>> (g_output, start_nonce); break ;
66
59
}
67
60
CUDA_SAFE_CALL (cudaGetLastError ());
68
61
}
@@ -86,61 +79,31 @@ ethash_calculate_dag_item(uint32_t start)
86
79
87
80
for (uint32_t i = 0 ; i != ETHASH_DATASET_PARENTS; ++i) {
88
81
uint32_t parent_index = fnv (node_index ^ i, dag_node.words [i % NODE_WORDS]) % d_light_size;
89
- #if __CUDA_ARCH__ < SHUFFLE_MIN_VER
90
- for (unsigned w = 0 ; w != 4 ; ++w) {
91
- dag_node.uint4s [w] = fnv4 (dag_node.uint4s [w], d_light[parent_index].uint4s [w]);
92
- }
93
- #else
94
82
for (uint32_t t = 0 ; t < 4 ; t++) {
95
83
96
- #if CUDA_VERSION < SHUFFLE_DEPRECATED
97
- uint32_t shuffle_index = __shfl (parent_index, t, 4 );
98
- #else
99
84
uint32_t shuffle_index = __shfl_sync (0xFFFFFFFF ,parent_index, t, 4 );
100
- #endif
101
85
102
86
uint4 p4 = d_light[shuffle_index].uint4s [thread_id];
103
87
for (int w = 0 ; w < 4 ; w++) {
104
88
105
- #if CUDA_VERSION < SHUFFLE_DEPRECATED
106
- uint4 s4 = make_uint4 (__shfl (p4.x , w, 4 ), __shfl (p4.y , w, 4 ), __shfl (p4.z , w, 4 ), __shfl (p4.w , w, 4 ));
107
- #else
108
89
uint4 s4 = make_uint4 (__shfl_sync (0xFFFFFFFF ,p4.x , w, 4 ), __shfl_sync (0xFFFFFFFF ,p4.y , w, 4 ), __shfl_sync (0xFFFFFFFF ,p4.z , w, 4 ), __shfl_sync (0xFFFFFFFF ,p4.w , w, 4 ));
109
- #endif
110
90
if (t == thread_id) {
111
91
dag_node.uint4s [w] = fnv4 (dag_node.uint4s [w], s4);
112
92
}
113
93
}
114
94
}
115
-
116
-
117
- #endif
118
95
}
119
96
SHA3_512 (dag_node.uint2s );
120
97
hash64_t * dag_nodes = (hash64_t *)d_dag;
121
98
122
- #if __CUDA_ARCH__ < SHUFFLE_MIN_VER
123
- for (uint32_t i = 0 ; i < 4 ; i++) {
124
- dag_nodes[node_index].uint4s [i] = dag_node.uint4s [i];
125
- }
126
- #else
127
99
for (uint32_t t = 0 ; t < 4 ; t++) {
128
- #if CUDA_VERSION < SHUFFLE_DEPRECATED
129
- uint32_t shuffle_index = __shfl (node_index, t, 4 );
130
- #else
131
100
uint32_t shuffle_index = __shfl_sync (0xFFFFFFFF ,node_index, t, 4 );
132
- #endif
133
101
uint4 s[4 ];
134
102
for (uint32_t w = 0 ; w < 4 ; w++) {
135
- #if CUDA_VERSION < SHUFFLE_DEPRECATED
136
- s[w] = make_uint4 (__shfl (dag_node.uint4s [w].x , t, 4 ), __shfl (dag_node.uint4s [w].y , t, 4 ), __shfl (dag_node.uint4s [w].z , t, 4 ), __shfl (dag_node.uint4s [w].w , t, 4 ));
137
- #else
138
103
s[w] = make_uint4 (__shfl_sync (0xFFFFFFFF ,dag_node.uint4s [w].x , t, 4 ), __shfl_sync (0xFFFFFFFF ,dag_node.uint4s [w].y , t, 4 ), __shfl_sync (0xFFFFFFFF ,dag_node.uint4s [w].z , t, 4 ), __shfl_sync (0xFFFFFFFF ,dag_node.uint4s [w].w , t, 4 ));
139
- #endif
140
104
}
141
105
dag_nodes[shuffle_index].uint4s [thread_id] = s[thread_id];
142
106
}
143
- #endif
144
107
}
145
108
146
109
void ethash_generate_dag (
0 commit comments