Skip to content
This repository was archived by the owner on Apr 24, 2022. It is now read-only.

Hardware Monitoring for NVML, ADL and AMDGPU SysFS #319

Merged
merged 8 commits into from
Nov 27, 2017
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -145,6 +145,7 @@ createBuildInfo()

add_subdirectory(libdevcore)
add_subdirectory(libethash)
add_subdirectory(libhwmon)
if (ETHASHCL)
add_subdirectory(libethash-cl)
endif ()
2 changes: 1 addition & 1 deletion libethash-cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -25,7 +25,7 @@ file(GLOB sources "*.cpp" "*.cu")
file(GLOB headers "*.h" "*.cuh")

cuda_add_library(ethash-cuda STATIC ${sources} ${headers})
target_link_libraries(ethash-cuda ethcore ethash)
target_link_libraries(ethash-cuda ethcore ethash hwmon)
target_include_directories(ethash-cuda PUBLIC ${CUDA_INCLUDE_DIRS})
target_include_directories(ethash-cuda PRIVATE .. ${CMAKE_CURRENT_BINARY_DIR})

4 changes: 2 additions & 2 deletions libethash-cuda/ethash_cuda_miner.cpp
Original file line number Diff line number Diff line change
@@ -366,8 +366,8 @@ dev::eth::HwMonitor ethash_cuda_miner::hwmon()
dev::eth::HwMonitor hw;
if (nvmlh) {
unsigned int tempC = 0, fanpcnt = 0;
wrap_nvml_get_tempC(nvmlh, m_device_num, &tempC);
wrap_nvml_get_fanpcnt(nvmlh, m_device_num, &fanpcnt);
wrap_nvml_get_tempC(nvmlh, nvmlh->cuda_nvml_device_id[m_device_num], &tempC);
wrap_nvml_get_fanpcnt(nvmlh, nvmlh->cuda_nvml_device_id[m_device_num], &fanpcnt);
hw.tempC = tempC;
hw.fanP = fanpcnt;
}
2 changes: 1 addition & 1 deletion libethash-cuda/ethash_cuda_miner.h
Original file line number Diff line number Diff line change
@@ -6,8 +6,8 @@
#include <functional>
#include <libethash/ethash.h>
#include <libethcore/Miner.h>
#include <libhwmon/wrapnvml.h>
#include "ethash_cuda_miner_kernel.h"
#include "wrapnvml.h"

class ethash_cuda_miner
{
12 changes: 12 additions & 0 deletions libhwmon/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
set(SOURCES
wrapnvml.h wrapnvml.cpp
)

add_library(hwmon ${SOURCES})
target_link_libraries(hwmon devcore)
target_include_directories(hwmon PRIVATE ..)

if (ETHASHCUDA)
find_package(CUDA REQUIRED)
target_include_directories(hwmon PUBLIC ${CUDA_INCLUDE_DIRS})
endif()
21 changes: 12 additions & 9 deletions libethash-cuda/wrapnvml.cu → libhwmon/wrapnvml.cpp
Original file line number Diff line number Diff line change
@@ -17,7 +17,9 @@
#include <stdio.h>
#include <stdlib.h>
#include "wrapnvml.h"
#if ETH_ETHASHCUDA
#include "cuda_runtime.h"
#endif

/*
* Wrappers to emulate dlopen() on other systems like Windows
@@ -133,6 +135,7 @@ wrap_nvml_handle * wrap_nvml_create() {
nvmlh->nvmlInit();
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);

#if ETH_ETHASHCUDA
/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
@@ -143,8 +146,10 @@ wrap_nvml_handle * wrap_nvml_create() {
free(nvmlh);
return NULL;
}

#endif
nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));

#if ETH_ETHASHCUDA
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
@@ -190,6 +195,8 @@ wrap_nvml_handle * wrap_nvml_create() {
}
}

#endif

return nvmlh;
}

@@ -214,10 +221,9 @@ int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
}

int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
int cudaindex,
int gpuindex,
char *namebuf,
int bufsize) {
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

@@ -229,9 +235,8 @@ int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,


int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
int cudaindex, unsigned int *tempC) {
int gpuindex, unsigned int *tempC) {
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

@@ -245,9 +250,8 @@ int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,


int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
int cudaindex, unsigned int *fanpcnt) {
int gpuindex, unsigned int *fanpcnt) {
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

@@ -261,9 +265,8 @@ int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,


int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
int cudaindex,
int gpuindex,
unsigned int *milliwatts) {
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

File renamed without changes.