No eval for OpenCL - One last thing on my ethminer todo list.

jean-m-cyr · jean-m-cyr · commit 485b5c0bcdef · 2018-05-01T09:41:31.000-04:00
- Rename -cuda-noeval option to --noeval since it can now apply to
  both Cuda and OpenCl.
- Add support for using GPU mix hash result when noeval is set in
  CLIMiner.
- Modify experimental opencl kernel to return mix hash on solution
  found.
- Modify stable opencl kernel to return mix hash on solution
  found.
- Move global s_noval bool to common miner.cpp
- Fix eval check to include equality.
diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h
@@ -536,9 +536,9 @@ class MinerCLI
 		}
 		else if (arg == "--cuda-streams" && i + 1 < argc)
 			m_numStreams = stol(argv[++i]);
-		else if (arg == "--cuda-noeval")
-			m_cudaNoEval = true;
 #endif
+		else if (arg == "--noeval")
+			m_noEval = true;
 		else if ((arg == "-L" || arg == "--dag-load-mode") && i + 1 < argc)
 		{
 			string mode = argv[++i];
@@ -696,6 +696,7 @@ class MinerCLI
 					0,
 					m_dagLoadMode,
 					m_dagCreateDevice,
+					m_noEval,
 					m_exit
 				))
 				exit(1);
@@ -722,7 +723,7 @@ class MinerCLI
 				m_cudaSchedule,
 				m_dagLoadMode,
 				m_dagCreateDevice,
-				m_cudaNoEval,
+				m_noEval,
 				m_exit
 				))
 				exit(1);
@@ -821,7 +822,7 @@ class MinerCLI
 			<< "        sync  - Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the results from the device." << endl
 			<< "    --cuda-devices <0 1 ..n> Select which CUDA GPUs to mine on. Default is to use all" << endl
 			<< "    --cuda-parallel-hash <1 2 ..8> Define how many hashes to calculate in a kernel, can be scaled to achieve better performance. Default=4" << endl
-			<< "    --cuda-noeval  bypass host software re-evaluation of GPU solutions." << endl
+			<< "    --noeval  bypass host software re-evaluation of GPU solutions." << endl
 			<< "        This will trim some milliseconds off the time it takes to send a result to the pool." << endl
 			<< "        Use at your own risk! If GPU generates errored results they WILL be forwarded to the pool" << endl
 			<< "        Not recommended at high overclock." << endl
@@ -1009,9 +1010,9 @@ class MinerCLI
 	unsigned m_cudaSchedule = 4; // sync
 	unsigned m_cudaGridSize = CUDAMiner::c_defaultGridSize;
 	unsigned m_cudaBlockSize = CUDAMiner::c_defaultBlockSize;
-	bool m_cudaNoEval = false;
 	unsigned m_parallelHash    = 4;
 #endif
+	bool m_noEval = false;
 	unsigned m_dagLoadMode = 0; // parallel
 	unsigned m_dagCreateDevice = 0;
 	bool m_exit = false;
diff --git a/libethash-cl/CLMiner.cpp b/libethash-cl/CLMiner.cpp
@@ -274,6 +274,13 @@ CLMiner::~CLMiner()
     kick_miner();
 }
 
+
+typedef struct {
+    unsigned count;
+    unsigned gid;
+    unsigned mix[8];
+} search_results;
+
 void CLMiner::workLoop()
 {
     // Memory for zero-ing buffers. Cannot be static because crashes on macOS.
@@ -341,15 +348,12 @@ void CLMiner::workLoop()
             }
 
             // Read results.
-            // TODO: could use pinned host pointer instead.
-            uint32_t results[c_maxSearchResults + 1];
+			search_results results;
+
             m_queue.enqueueReadBuffer(m_searchBuffer, CL_TRUE, 0, sizeof(results), &results);
 
-            uint64_t nonce = 0;
-            if (results[0] > 0)
+            if (results.count)
             {
-                // Ignore results except the first one.
-                nonce = current.startNonce + results[1];
                 // Reset search buffer if any solution found.
                 m_queue.enqueueWriteBuffer(m_searchBuffer, CL_FALSE, 0, sizeof(c_zero), &c_zero);
             }
@@ -359,14 +363,21 @@ void CLMiner::workLoop()
             m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, m_workgroupSize);
 
             // Report results while the kernel is running.
-            // It takes some time because ethash must be re-evaluated on CPU.
-            if (nonce != 0) {
-                Result r = EthashAux::eval(current.epoch, current.header, nonce);
-                if (r.value < current.boundary)
-                    farm.submitProof(Solution{nonce, r.mixHash, current, current.header != w.header});
+            if (results.count) {
+                uint64_t nonce = current.startNonce + results.gid;
+                if (!s_noeval) {
+                    Result r = EthashAux::eval(current.epoch, current.header, nonce);
+                    if (r.value <= current.boundary)
+                        farm.submitProof(Solution{nonce, r.mixHash, current, current.header != w.header});
+                    else {
+                        farm.failedSolution();
+                        cwarn << "GPU gave incorrect result!";
+                    }
+                }
                 else {
-                    farm.failedSolution();
-                    cwarn << "FAILURE: GPU gave incorrect result!";
+                    h256 mix;
+                    memcpy(mix.data(), results.mix, sizeof(results.mix));
+                    farm.submitProof(Solution{nonce, mix, current, current.header != w.header});
                 }
             }
 
@@ -450,8 +461,10 @@ void CLMiner::listDevices()
 }
 
 bool CLMiner::configureGPU(unsigned _localWorkSize, unsigned _globalWorkSizeMultiplier,
-    unsigned _platformId, int epoch, unsigned _dagLoadMode, unsigned _dagCreateDevice, bool _exit)
+    unsigned _platformId, int epoch, unsigned _dagLoadMode, unsigned _dagCreateDevice,
+	bool _noeval, bool _exit)
 {
+	s_noeval = _noeval;
     s_dagLoadMode = _dagLoadMode;
     s_dagCreateDevice = _dagCreateDevice;
     s_exit = _exit;
@@ -677,7 +690,7 @@ bool CLMiner::init(int epoch)
 
         // create mining buffers
         ETHCL_LOG("Creating mining buffer");
-        m_searchBuffer = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_maxSearchResults + 1) * sizeof(uint32_t));
+        m_searchBuffer = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, sizeof(search_results));
 
         const auto workItems = dagNumItems * 2;  // GPU computes partial 512-bit DAG items.
         uint32_t fullRuns = workItems / m_globalWorkSize;
diff --git a/libethash-cl/CLMiner.h b/libethash-cl/CLMiner.h
@@ -61,7 +61,7 @@ class CLMiner: public Miner
 	static void listDevices();
     static bool configureGPU(unsigned _localWorkSize, unsigned _globalWorkSizeMultiplier,
         unsigned _platformId, int epoch, unsigned _dagLoadMode, unsigned _dagCreateDevice,
-        bool _exit);
+        bool _noeval, bool _exit);
     static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, getNumDevices()); }
 	static void setThreadsPerHash(unsigned _threadsPerHash){s_threadsPerHash = _threadsPerHash; }
 	static void setDevices(const vector<unsigned>& _devices, unsigned _selectedDeviceCount)
diff --git a/libethash-cl/CLMiner_kernel_experimental.cl b/libethash-cl/CLMiner_kernel_experimental.cl
@@ -266,12 +266,17 @@ typedef union {
     uint16  uint16s[200 / sizeof(uint16)];
 } hash200_t;
 
+typedef struct {
+    unsigned count;
+    unsigned gid;
+    ulong mix[4];
+} search_results;
 
 #if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 #endif
 __kernel void ethash_search(
-    __global volatile uint* restrict g_output,
+    __global volatile search_results* restrict g_output,
     __constant hash32_t const* g_header,
     __global hash128_t const* g_dag,
     ulong start_nonce,
@@ -400,6 +405,12 @@ __kernel void ethash_search(
     }
  #endif
 
+    ulong mixhash[4];
+    mixhash[0] = state[8];
+    mixhash[1] = state[9];
+    mixhash[2] = state[10];
+    mixhash[3] = state[11];
+
     for (uint i = 13; i != 25; ++i) {
         state[i] = 0;
     }
@@ -411,8 +422,13 @@ __kernel void ethash_search(
 
     if (as_ulong(as_uchar8(state[0]).s76543210) > target)
 		return;
-    uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
-    g_output[slot] = gid;
+    if (atomic_inc(&g_output->count))
+        return;
+    g_output->gid = gid;
+    g_output->mix[0] = mixhash[0];
+    g_output->mix[1] = mixhash[1];
+    g_output->mix[2] = mixhash[2];
+    g_output->mix[3] = mixhash[3];
 }
 
 __kernel void ethash_calculate_dag_item(uint start, __global hash64_t const* g_light, __global hash64_t * g_dag, uint isolate)
diff --git a/libethash-cl/CLMiner_kernel_stable.cl b/libethash-cl/CLMiner_kernel_stable.cl
@@ -284,11 +284,17 @@ typedef union {
 	uint  uints[16];
 } compute_hash_share;
 
+typedef struct {
+    unsigned count;
+    unsigned gid;
+    ulong mix[4];
+} search_results;
+
 #if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 #endif
 __kernel void ethash_search(
-	__global volatile uint* restrict g_output,
+	__global volatile search_results* restrict g_output,
 	__constant hash32_t const* g_header,
 	__global hash128_t const* g_dag,
 	ulong start_nonce,
@@ -364,6 +370,12 @@ __kernel void ethash_search(
 		barrier(CLK_LOCAL_MEM_FENCE);
 	}
 
+    ulong mixhash[4];
+    mixhash[0] = state[8];
+    mixhash[1] = state[9];
+    mixhash[2] = state[10];
+    mixhash[3] = state[11];
+
 	for (uint i = 13; i != 25; ++i)
 	{
 		state[i] = 0;
@@ -374,10 +386,15 @@ __kernel void ethash_search(
 	// keccak_256(keccak_512(header..nonce) .. mix);
 	keccak_f1600_no_absorb((uint2*)state, 1, isolate);
 
-	if (as_ulong(as_uchar8(state[0]).s76543210) > target)
-		return;
-	uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
-	g_output[slot] = gid;
+    if (as_ulong(as_uchar8(state[0]).s76543210) > target)
+        return;
+    if (atomic_inc(&g_output->count))
+        return;
+    g_output->gid = gid;
+    g_output->mix[0] = mixhash[0];
+    g_output->mix[1] = mixhash[1];
+    g_output->mix[2] = mixhash[2];
+    g_output->mix[3] = mixhash[3];
 }
 
 static void SHA3_512(uint2* s, uint isolate)
diff --git a/libethash-cuda/CUDAMiner.cpp b/libethash-cuda/CUDAMiner.cpp
@@ -299,7 +299,6 @@ unsigned CUDAMiner::s_blockSize = CUDAMiner::c_defaultBlockSize;
 unsigned CUDAMiner::s_gridSize = CUDAMiner::c_defaultGridSize;
 unsigned CUDAMiner::s_numStreams = CUDAMiner::c_defaultNumStreams;
 unsigned CUDAMiner::s_scheduleFlag = 0;
-bool CUDAMiner::s_noeval = false;
 
 bool CUDAMiner::cuda_init(
     size_t numDevices,
@@ -514,7 +513,7 @@ void CUDAMiner::search(
                     else
                     {
                         Result r = EthashAux::eval(w.epoch, w.header, nonces[i]);
-                        if (r.value < w.boundary)
+                        if (r.value <= w.boundary)
                             farm.submitProof(Solution{nonces[i], r.mixHash, w, m_new_work});
                         else
                         {
diff --git a/libethash-cuda/CUDAMiner.h b/libethash-cuda/CUDAMiner.h
@@ -127,9 +127,6 @@ class CUDAMiner: public Miner
 
 	static unsigned s_numInstances;
 	static vector<int> s_devices;
-
-	static bool s_noeval;
-
 };
 
 
diff --git a/libethcore/Miner.cpp b/libethcore/Miner.cpp
@@ -14,3 +14,5 @@ uint8_t* dev::eth::Miner::s_dagInHostMemory = NULL;
 
 bool dev::eth::Miner::s_exit = false;
 
+bool dev::eth::Miner::s_noeval = false;
+
diff --git a/libethcore/Miner.h b/libethcore/Miner.h
@@ -234,6 +234,7 @@ class Miner: public Worker
 	static unsigned s_dagCreateDevice;
 	static uint8_t* s_dagInHostMemory;
 	static bool s_exit;
+	static bool s_noeval;
 
 	const size_t index = 0;
 	FarmFace& farm;

Original file line number	Diff line number	Diff line change
`@@ -14,3 +14,5 @@ uint8_t* dev::eth::Miner::s_dagInHostMemory = NULL;`
`14`	`14`
`15`	`15`	`bool dev::eth::Miner::s_exit = false;`
`16`	`16`
	`17`	`+bool dev::eth::Miner::s_noeval = false;`
	`18`	`+`