@@ -108,8 +108,6 @@ void CUDAMiner::workLoop()
108
108
std::this_thread::sleep_for (std::chrono::seconds (3 ));
109
109
continue ;
110
110
}
111
-
112
- // cnote << "set work; seed: " << "#" + w.seed.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12);
113
111
if (current.seed != w.seed )
114
112
{
115
113
if (!init (w.seed ))
@@ -324,7 +322,7 @@ bool CUDAMiner::cuda_init(
324
322
325
323
cudalog << " Using device: " << device_props.name << " (Compute " + to_string (device_props.major ) + " ." + to_string (device_props.minor ) + " )" ;
326
324
327
- m_search_buf = new volatile uint32_t *[s_numStreams];
325
+ m_search_buf = new volatile search_results *[s_numStreams];
328
326
m_streams = new cudaStream_t[s_numStreams];
329
327
330
328
uint64_t dagSize = ethash_get_datasize (_light->block_number );
@@ -376,7 +374,7 @@ bool CUDAMiner::cuda_init(
376
374
cudalog << " Generating mining buffers" ; // TODO whats up with this?
377
375
for (unsigned i = 0 ; i != s_numStreams; ++i)
378
376
{
379
- CUDA_SAFE_CALL (cudaMallocHost (&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof (uint32_t )));
377
+ CUDA_SAFE_CALL (cudaMallocHost (&m_search_buf[i], sizeof (search_results )));
380
378
CUDA_SAFE_CALL (cudaStreamCreate (&m_streams[i]));
381
379
}
382
380
@@ -385,8 +383,6 @@ bool CUDAMiner::cuda_init(
385
383
m_current_nonce = 0 ;
386
384
m_current_index = 0 ;
387
385
388
- m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ;
389
-
390
386
if (!hostDAG)
391
387
{
392
388
if ((m_device_num == dagCreateDevice) || !_cpyToHost){ // if !cpyToHost -> All devices shall generate their DAG
@@ -455,7 +451,7 @@ void CUDAMiner::search(
455
451
m_current_index = 0 ;
456
452
CUDA_SAFE_CALL (cudaDeviceSynchronize ());
457
453
for (unsigned int i = 0 ; i < s_numStreams; i++)
458
- m_search_buf[i][ 0 ] = 0 ;
454
+ m_search_buf[i]-> count = 0 ;
459
455
}
460
456
if (m_starting_nonce != _startN)
461
457
{
@@ -472,7 +468,7 @@ void CUDAMiner::search(
472
468
m_current_index = 0 ;
473
469
CUDA_SAFE_CALL (cudaDeviceSynchronize ());
474
470
for (unsigned int i = 0 ; i < s_numStreams; i++)
475
- m_search_buf[i][ 0 ] = 0 ;
471
+ m_search_buf[i]-> count = 0 ;
476
472
}
477
473
}
478
474
uint64_t batch_size = s_gridSize * s_blockSize;
@@ -482,38 +478,42 @@ void CUDAMiner::search(
482
478
m_current_nonce += batch_size;
483
479
auto stream_index = m_current_index % s_numStreams;
484
480
cudaStream_t stream = m_streams[stream_index];
485
- volatile uint32_t * buffer = m_search_buf[stream_index];
481
+ volatile search_results * buffer = m_search_buf[stream_index];
486
482
uint32_t found_count = 0 ;
487
- uint64_t nonces[SEARCH_RESULT_ENTRIES ];
488
- uint32_t mixes[SEARCH_RESULT_ENTRIES ][8 ];
483
+ uint64_t nonces[SEARCH_RESULTS ];
484
+ uint32_t mixes[SEARCH_RESULTS ][8 ];
489
485
uint64_t nonce_base = m_current_nonce - s_numStreams * batch_size;
490
486
if (m_current_index >= s_numStreams)
491
487
{
492
488
CUDA_SAFE_CALL (cudaStreamSynchronize (stream));
493
- found_count = buffer[ 0 ] ;
489
+ found_count = buffer-> count ;
494
490
if (found_count) {
495
- buffer[ 0 ] = 0 ;
496
- if (found_count >= SEARCH_RESULT_ENTRIES )
497
- found_count = SEARCH_RESULT_ENTRIES - 1 ;
498
- for (unsigned int j = 1 ; j <= found_count; j++) {
499
- nonces[j] = nonce_base + buffer[j];
500
- mixes[j][0 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 1 ) ];
501
- mixes[j][1 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 2 ) ];
502
- mixes[j][2 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 3 ) ];
503
- mixes[j][3 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 4 ) ];
504
- mixes[j][4 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 5 ) ];
505
- mixes[j][5 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 6 ) ];
506
- mixes[j][6 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 7 ) ];
507
- mixes[j][7 ] = buffer[j + (SEARCH_RESULT_ENTRIES * 8 ) ];
491
+ buffer-> count = 0 ;
492
+ if (found_count > SEARCH_RESULTS )
493
+ found_count = SEARCH_RESULTS ;
494
+ for (unsigned int j = 0 ; j < found_count; j++) {
495
+ nonces[j] = nonce_base + buffer-> result [j]. gid ;
496
+ mixes[j][0 ] = buffer-> result [j]. mix [ 0 ];
497
+ mixes[j][1 ] = buffer-> result [j]. mix [ 1 ];
498
+ mixes[j][2 ] = buffer-> result [j]. mix [ 2 ];
499
+ mixes[j][3 ] = buffer-> result [j]. mix [ 3 ];
500
+ mixes[j][4 ] = buffer-> result [j]. mix [ 4 ];
501
+ mixes[j][5 ] = buffer-> result [j]. mix [ 5 ];
502
+ mixes[j][6 ] = buffer-> result [j]. mix [ 6 ];
503
+ mixes[j][7 ] = buffer-> result [j]. mix [ 7 ];
508
504
}
509
505
}
510
506
}
511
- run_ethash_search (s_gridSize, s_blockSize, m_sharedBytes, stream, buffer, m_current_nonce, m_parallelHash);
507
+ run_ethash_search (s_gridSize, s_blockSize, stream, buffer, m_current_nonce, m_parallelHash);
512
508
if (m_current_index >= s_numStreams)
513
509
{
514
510
if (found_count)
515
- for (uint32_t i = 1 ; i <= found_count; i++)
516
- farm.submitProof (Solution{nonces[i], *((h256 *)mixes[i]), w.header , w.seed , w.boundary , w.job , w.job_len , m_abort});
511
+ for (uint32_t i = 0 ; i < found_count; i++)
512
+ farm.submitProof (
513
+ Solution{nonces[i],
514
+ *((const h256 *)mixes[i]),
515
+ w,
516
+ m_abort});
517
517
addHashCount (batch_size);
518
518
if (m_abort || shouldStop ())
519
519
{
0 commit comments