mirror of
https://github.com/alicevision/Meshroom.git
synced 2025-05-23 22:16:30 +02:00
[core] stats: minor simplification
add timeout on nvidia-smi add try/except remove duplicated code no error is no nvidia-smi available
This commit is contained in:
parent
88099dd28c
commit
cdee25e26e
1 changed files with 26 additions and 36 deletions
|
@ -44,7 +44,7 @@ class ComputerStatistics:
|
||||||
self.gpuMemoryTotal = 0
|
self.gpuMemoryTotal = 0
|
||||||
self.gpuName = ''
|
self.gpuName = ''
|
||||||
self.curves = defaultdict(list)
|
self.curves = defaultdict(list)
|
||||||
|
self.nvidia_smi = None
|
||||||
self._isInit = False
|
self._isInit = False
|
||||||
|
|
||||||
def initOnFirstTime(self):
|
def initOnFirstTime(self):
|
||||||
|
@ -53,40 +53,21 @@ class ComputerStatistics:
|
||||||
self._isInit = True
|
self._isInit = True
|
||||||
|
|
||||||
self.cpuFreq = psutil.cpu_freq().max
|
self.cpuFreq = psutil.cpu_freq().max
|
||||||
self.ramTotal = psutil.virtual_memory().total / 1024/1024/1024
|
self.ramTotal = psutil.virtual_memory().total / (1024*1024*1024)
|
||||||
|
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
from distutils import spawn
|
from distutils import spawn
|
||||||
# If the platform is Windows and nvidia-smi
|
# If the platform is Windows and nvidia-smi
|
||||||
# could not be found from the environment path,
|
|
||||||
# try to find it from system drive with default installation path
|
|
||||||
self.nvidia_smi = spawn.find_executable('nvidia-smi')
|
self.nvidia_smi = spawn.find_executable('nvidia-smi')
|
||||||
if self.nvidia_smi is None:
|
if self.nvidia_smi is None:
|
||||||
self.nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
|
# could not be found from the environment path,
|
||||||
|
# try to find it from system drive with default installation path
|
||||||
|
default_nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
|
||||||
|
if os.path.isfile(default_nvidia_smi):
|
||||||
|
self.nvidia_smi = default_nvidia_smi
|
||||||
else:
|
else:
|
||||||
self.nvidia_smi = "nvidia-smi"
|
self.nvidia_smi = "nvidia-smi"
|
||||||
|
|
||||||
try:
|
|
||||||
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
|
|
||||||
xmlGpu, stdError = p.communicate()
|
|
||||||
|
|
||||||
smiTree = ET.fromstring(xmlGpu)
|
|
||||||
gpuTree = smiTree.find('gpu')
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.gpuMemoryTotal = gpuTree.find('fb_memory_usage').find('total').text.split(" ")[0]
|
|
||||||
except Exception as e:
|
|
||||||
logging.debug('Failed to get gpuMemoryTotal: "{}".'.format(str(e)))
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
self.gpuName = gpuTree.find('product_name').text
|
|
||||||
except Exception as e:
|
|
||||||
logging.debug('Failed to get gpuName: "{}".'.format(str(e)))
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logging.debug('Failed to get information from nvidia_smi at init: "{}".'.format(str(e)))
|
|
||||||
|
|
||||||
def _addKV(self, k, v):
|
def _addKV(self, k, v):
|
||||||
if isinstance(v, tuple):
|
if isinstance(v, tuple):
|
||||||
for ki, vi in v._asdict().items():
|
for ki, vi in v._asdict().items():
|
||||||
|
@ -98,6 +79,7 @@ class ComputerStatistics:
|
||||||
self.curves[k].append(v)
|
self.curves[k].append(v)
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
|
try:
|
||||||
self.initOnFirstTime()
|
self.initOnFirstTime()
|
||||||
self._addKV('cpuUsage', psutil.cpu_percent(percpu=True)) # interval=None => non-blocking (percentage since last call)
|
self._addKV('cpuUsage', psutil.cpu_percent(percpu=True)) # interval=None => non-blocking (percentage since last call)
|
||||||
self._addKV('ramUsage', psutil.virtual_memory().percent)
|
self._addKV('ramUsage', psutil.virtual_memory().percent)
|
||||||
|
@ -105,11 +87,15 @@ class ComputerStatistics:
|
||||||
self._addKV('vramUsage', 0)
|
self._addKV('vramUsage', 0)
|
||||||
self._addKV('ioCounters', psutil.disk_io_counters())
|
self._addKV('ioCounters', psutil.disk_io_counters())
|
||||||
self.updateGpu()
|
self.updateGpu()
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug('Failed to get statistics: "{}".'.format(str(e)))
|
||||||
|
|
||||||
def updateGpu(self):
|
def updateGpu(self):
|
||||||
|
if not self.nvidia_smi:
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
|
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
xmlGpu, stdError = p.communicate()
|
xmlGpu, stdError = p.communicate(timeout=10) # 10 seconds
|
||||||
|
|
||||||
smiTree = ET.fromstring(xmlGpu)
|
smiTree = ET.fromstring(xmlGpu)
|
||||||
gpuTree = smiTree.find('gpu')
|
gpuTree = smiTree.find('gpu')
|
||||||
|
@ -129,7 +115,11 @@ class ComputerStatistics:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.debug('Failed to get gpuTemperature: "{}".'.format(str(e)))
|
logging.debug('Failed to get gpuTemperature: "{}".'.format(str(e)))
|
||||||
pass
|
pass
|
||||||
|
except subprocess.TimeoutExpired as e:
|
||||||
|
logging.debug('Timeout when retrieving information from nvidia_smi: "{}".'.format(str(e)))
|
||||||
|
p.kill()
|
||||||
|
outs, errs = p.communicate()
|
||||||
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.debug('Failed to get information from nvidia_smi: "{}".'.format(str(e)))
|
logging.debug('Failed to get information from nvidia_smi: "{}".'.format(str(e)))
|
||||||
return
|
return
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue