Skip to content
This repository was archived by the owner on Apr 24, 2022. It is now read-only.

Commit d788e9e

Browse files
committedNov 25, 2018
Hardware monitors mapped by pci id
Instead of using indexes make certain every GPU matches it's hw monitor by Pci Id
1 parent 5ad2a43 commit d788e9e

File tree

12 files changed

+280
-517
lines changed

12 files changed

+280
-517
lines changed
 

‎libethash-cl/CLMiner.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -610,16 +610,21 @@ bool CLMiner::initDevice()
610610
if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Nvidia)
611611
{
612612
m_hwmoninfo.deviceType = HwMonitorInfoType::NVIDIA;
613-
m_hwmoninfo.indexSource = HwMonitorIndexSource::OPENCL;
613+
m_hwmoninfo.devicePciId = m_deviceDescriptor.UniqueId;
614+
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
614615
m_noBinary = true;
615616
}
616617
else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Amd)
617618
{
618619
m_hwmoninfo.deviceType = HwMonitorInfoType::AMD;
619-
m_hwmoninfo.indexSource = HwMonitorIndexSource::OPENCL;
620+
m_hwmoninfo.devicePciId = m_deviceDescriptor.UniqueId;
621+
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
620622
}
621623
else if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
622624
{
625+
m_hwmoninfo.deviceType = HwMonitorInfoType::UNKNOWN;
626+
m_hwmoninfo.devicePciId = m_deviceDescriptor.UniqueId;
627+
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
623628
m_noBinary = true;
624629
}
625630
else

‎libethash-cuda/CUDAMiner.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,14 @@ unsigned CUDAMiner::s_scheduleFlag = 0;
5555
bool CUDAMiner::initDevice()
5656
{
5757

58-
cudalog << "Using PciId : " << m_deviceDescriptor.UniqueId << " " << m_deviceDescriptor.cuName
58+
cudalog << "Using Pci Id : " << m_deviceDescriptor.UniqueId << " " << m_deviceDescriptor.cuName
5959
<< " (Compute " + m_deviceDescriptor.cuCompute + ") Memory : "
6060
<< FormattedMemSize(m_deviceDescriptor.TotalMemory);
6161

6262
// Set Hardware Monitor Info
6363
m_hwmoninfo.deviceType = HwMonitorInfoType::NVIDIA;
64-
m_hwmoninfo.indexSource = HwMonitorIndexSource::CUDA;
65-
m_hwmoninfo.deviceIndex = m_deviceDescriptor.cuDeviceIndex;
64+
m_hwmoninfo.devicePciId = m_deviceDescriptor.UniqueId;
65+
m_hwmoninfo.deviceIndex = -1; // Will be later on mapped by nvml (see Farm() constructor)
6666

6767
try
6868
{

‎libethcore/Farm.cpp

+117-33
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,56 @@ Farm::Farm(
3939
{
4040
#if defined(__linux)
4141
sysfsh = wrap_amdsysfs_create();
42+
if (sysfsh)
43+
{
44+
// Build Pci identification mapping as done in miners.
45+
for (int i = 0; i < sysfsh->sysfs_gpucount; i++)
46+
{
47+
std::ostringstream oss;
48+
std::string uniqueId;
49+
oss << std::setfill('0') << std::setw(2) << std::hex
50+
<< (unsigned int)sysfsh->sysfs_pci_bus_id[i] << ":" << std::setw(2)
51+
<< (unsigned int)(sysfsh->sysfs_pci_device_id[i]) << ".0";
52+
uniqueId = oss.str();
53+
map_amdsysfs_handle[uniqueId] = i;
54+
}
55+
}
56+
4257
#else
4358
adlh = wrap_adl_create();
59+
if (adlh)
60+
{
61+
// Build Pci identification as done in miners.
62+
for (int i = 0; i < adlh->adl_gpucount; i++)
63+
{
64+
std::ostringstream oss;
65+
std::string uniqueId;
66+
oss << std::setfill('0') << std::setw(2) << std::hex
67+
<< (unsigned int)adlh->devs[adlh->phys_logi_device_id[i]].iBusNumber << ":"
68+
<< std::setw(2)
69+
<< (unsigned int)(adlh->devs[adlh->phys_logi_device_id[i]].iDeviceNumber)
70+
<< ".0";
71+
uniqueId = oss.str();
72+
map_adl_handle[uniqueId] = i;
73+
}
74+
}
75+
4476
#endif
4577
nvmlh = wrap_nvml_create();
78+
if (nvmlh)
79+
{
80+
// Build Pci identification as done in miners.
81+
for (int i = 0; i < nvmlh->nvml_gpucount; i++)
82+
{
83+
std::ostringstream oss;
84+
std::string uniqueId;
85+
oss << std::setfill('0') << std::setw(2) << std::hex
86+
<< (unsigned int)nvmlh->nvml_pci_bus_id[i] << ":" << std::setw(2)
87+
<< (unsigned int)(nvmlh->nvml_pci_device_id[i] >> 3) << ".0";
88+
uniqueId = oss.str();
89+
map_nvml_handle[uniqueId] = i;
90+
}
91+
}
4692
}
4793

4894
// Initialize nonce_scrambler
@@ -66,11 +112,12 @@ Farm::~Farm()
66112
m_collectTimer.cancel();
67113

68114
// Deinit HWMON
69-
if (adlh)
70-
wrap_adl_destroy(adlh);
71115
#if defined(__linux)
72116
if (sysfsh)
73117
wrap_amdsysfs_destroy(sysfsh);
118+
#else
119+
if (adlh)
120+
wrap_adl_destroy(adlh);
74121
#endif
75122
if (nvmlh)
76123
wrap_nvml_destroy(nvmlh);
@@ -122,7 +169,8 @@ void Farm::setWork(WorkPackage const& _newWp)
122169
{
123170
// Equally divide the residual segment among miners
124171
_startNonce = m_currentWp.startNonce;
125-
m_nonce_segment_with = (unsigned int)log2(pow(2, 64 - (m_currentWp.exSizeBytes * 4)) / m_miners.size());
172+
m_nonce_segment_with =
173+
(unsigned int)log2(pow(2, 64 - (m_currentWp.exSizeBytes * 4)) / m_miners.size());
126174
}
127175
else
128176
{
@@ -373,59 +421,95 @@ void Farm::collectData(const boost::system::error_code& ec)
373421
HwMonitorInfo hwInfo = miner->hwmonInfo();
374422
HwMonitor hw;
375423
unsigned int tempC = 0, fanpcnt = 0, powerW = 0;
376-
if (hwInfo.deviceIndex >= 0)
424+
425+
if (hwInfo.deviceType == HwMonitorInfoType::NVIDIA && nvmlh)
377426
{
378-
if (hwInfo.deviceType == HwMonitorInfoType::NVIDIA && nvmlh)
427+
int devIdx = hwInfo.deviceIndex;
428+
if (devIdx == -1 && !hwInfo.devicePciId.empty())
379429
{
380-
int typeidx = 0;
381-
if (hwInfo.indexSource == HwMonitorIndexSource::CUDA)
382-
typeidx = nvmlh->cuda_nvml_device_id[hwInfo.deviceIndex];
383-
else if (hwInfo.indexSource == HwMonitorIndexSource::OPENCL)
384-
typeidx = nvmlh->opencl_nvml_device_id[hwInfo.deviceIndex];
430+
if (map_nvml_handle.find(hwInfo.devicePciId) != map_nvml_handle.end())
431+
{
432+
devIdx = map_nvml_handle[hwInfo.devicePciId];
433+
miner->setHwmonDeviceIndex(devIdx);
434+
}
385435
else
386-
typeidx = hwInfo.deviceIndex; // Unknown, don't map
436+
{
437+
// This will prevent further tries to map
438+
miner->setHwmonDeviceIndex(-2);
439+
}
440+
}
387441

388-
wrap_nvml_get_tempC(nvmlh, typeidx, &tempC);
389-
wrap_nvml_get_fanpcnt(nvmlh, typeidx, &fanpcnt);
442+
if (devIdx >= 0)
443+
{
444+
wrap_nvml_get_tempC(nvmlh, devIdx, &tempC);
445+
wrap_nvml_get_fanpcnt(nvmlh, devIdx, &fanpcnt);
390446

391447
if (m_hwmonlvl == 2)
392-
wrap_nvml_get_power_usage(nvmlh, typeidx, &powerW);
448+
wrap_nvml_get_power_usage(nvmlh, devIdx, &powerW);
393449
}
394-
else if (hwInfo.deviceType == HwMonitorInfoType::AMD)
395-
{
396-
int typeidx = 0;
450+
}
451+
else if (hwInfo.deviceType == HwMonitorInfoType::AMD)
452+
{
453+
int devIdx = 0;
397454
#if defined(__linux)
398-
if (sysfsh)
455+
if (sysfsh)
456+
{
457+
devIdx = hwInfo.deviceIndex;
458+
if (devIdx == -1 && !hwInfo.devicePciId.empty())
399459
{
400-
if (hwInfo.indexSource == HwMonitorIndexSource::OPENCL)
401-
typeidx = sysfsh->opencl_sysfs_device_id[hwInfo.deviceIndex];
460+
if (map_amdsysfs_handle.find(hwInfo.devicePciId) !=
461+
map_amdsysfs_handle.end())
462+
{
463+
devIdx = map_amdsysfs_handle[hwInfo.devicePciId];
464+
miner->setHwmonDeviceIndex(devIdx);
465+
}
402466
else
403-
typeidx = hwInfo.deviceIndex; // Unknown don't map
404-
405-
wrap_amdsysfs_get_tempC(sysfsh, typeidx, &tempC);
406-
wrap_amdsysfs_get_fanpcnt(sysfsh, typeidx, &fanpcnt);
467+
{
468+
// This will prevent further tries to map
469+
miner->setHwmonDeviceIndex(-2);
470+
}
471+
}
472+
473+
if (devIdx >= 0)
474+
{
475+
wrap_amdsysfs_get_tempC(sysfsh, devIdx, &tempC);
476+
wrap_amdsysfs_get_fanpcnt(sysfsh, devIdx, &fanpcnt);
407477

408478
if (m_hwmonlvl == 2)
409-
wrap_amdsysfs_get_power_usage(sysfsh, typeidx, &powerW);
479+
wrap_amdsysfs_get_power_usage(sysfsh, devIdx, &powerW);
410480
}
481+
}
411482
#else
412-
if (adlh) // Windows only for AMD
483+
if (adlh) // Windows only for AMD
484+
{
485+
int devIdx = hwInfo.deviceIndex;
486+
if (devIdx == -1 && !hwInfo.devicePciId.empty())
413487
{
414-
if (hwInfo.indexSource == HwMonitorIndexSource::OPENCL)
415-
typeidx = adlh->opencl_adl_device_id[hwInfo.deviceIndex];
488+
if (map_adl_handle.find(hwInfo.devicePciId) != map_adl_handle.end())
489+
{
490+
devIdx = map_adl_handle[hwInfo.devicePciId];
491+
miner->setHwmonDeviceIndex(devIdx);
492+
}
416493
else
417-
typeidx = hwInfo.deviceIndex; // Unknown don't map
494+
{
495+
// This will prevent further tries to map
496+
miner->setHwmonDeviceIndex(-2);
497+
}
498+
}
418499

419-
wrap_adl_get_tempC(adlh, typeidx, &tempC);
420-
wrap_adl_get_fanpcnt(adlh, typeidx, &fanpcnt);
500+
if (devIdx >= 0)
501+
{
502+
wrap_adl_get_tempC(adlh, devIdx, &tempC);
503+
wrap_adl_get_fanpcnt(adlh, devIdx, &fanpcnt);
421504

422505
if (m_hwmonlvl == 2)
423-
wrap_adl_get_power_usage(adlh, typeidx, &powerW);
506+
wrap_adl_get_power_usage(adlh, devIdx, &powerW);
424507
}
425-
#endif
426508
}
509+
#endif
427510
}
428511

512+
429513
// If temperature control has been enabled call
430514
// check threshold
431515
if (m_tstop)

‎libethcore/Farm.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,16 @@ class Farm : public FarmFace
253253
// Whether or not GPU solutions should be CPU re-evaluated
254254
bool m_noeval = false;
255255

256-
// Wrappers for hardware monitoring libraries
256+
// Wrappers for hardware monitoring libraries and their mappers
257257
wrap_nvml_handle* nvmlh = nullptr;
258+
std::map<string, int> map_nvml_handle = {};
258259

259260
#if defined(__linux)
260261
wrap_amdsysfs_handle* sysfsh = nullptr;
262+
std::map<string, int> map_amdsysfs_handle = {};
261263
#else
262264
wrap_adl_handle* adlh = nullptr;
265+
std::map<string, int> map_adl_handle = {};
263266
#endif
264267

265268
static Farm* m_this;

‎libethcore/Miner.h

+3-8
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,6 @@ enum class HwMonitorInfoType
6767
AMD
6868
};
6969

70-
enum class HwMonitorIndexSource
71-
{
72-
UNKNOWN,
73-
OPENCL,
74-
CUDA
75-
};
76-
7770
enum class ClPlatformTypeEnum
7871
{
7972
Unknown,
@@ -124,7 +117,7 @@ struct DeviceDescriptorType
124117
struct HwMonitorInfo
125118
{
126119
HwMonitorInfoType deviceType = HwMonitorInfoType::UNKNOWN;
127-
HwMonitorIndexSource indexSource = HwMonitorIndexSource::UNKNOWN;
120+
string devicePciId;
128121
int deviceIndex = -1;
129122
};
130123

@@ -333,6 +326,8 @@ class Miner : public Worker
333326

334327
HwMonitorInfo hwmonInfo() { return m_hwmoninfo; }
335328

329+
void setHwmonDeviceIndex(int i) { m_hwmoninfo.deviceIndex = i; }
330+
336331
/**
337332
* @brief Pauses mining setting a reason flag
338333
*/

0 commit comments

Comments
 (0)
This repository has been archived.