mirror of
https://github.com/alicevision/Meshroom.git
synced 2025-05-23 05:56:36 +02:00
[core] stats: minor simplification
add timeout on nvidia-smi add try/except remove duplicated code no error is no nvidia-smi available
This commit is contained in:
parent
88099dd28c
commit
cdee25e26e
1 changed files with 26 additions and 36 deletions
|
@ -44,7 +44,7 @@ class ComputerStatistics:
|
|||
self.gpuMemoryTotal = 0
|
||||
self.gpuName = ''
|
||||
self.curves = defaultdict(list)
|
||||
|
||||
self.nvidia_smi = None
|
||||
self._isInit = False
|
||||
|
||||
def initOnFirstTime(self):
|
||||
|
@ -53,40 +53,21 @@ class ComputerStatistics:
|
|||
self._isInit = True
|
||||
|
||||
self.cpuFreq = psutil.cpu_freq().max
|
||||
self.ramTotal = psutil.virtual_memory().total / 1024/1024/1024
|
||||
self.ramTotal = psutil.virtual_memory().total / (1024*1024*1024)
|
||||
|
||||
if platform.system() == "Windows":
|
||||
from distutils import spawn
|
||||
# If the platform is Windows and nvidia-smi
|
||||
# could not be found from the environment path,
|
||||
# try to find it from system drive with default installation path
|
||||
self.nvidia_smi = spawn.find_executable('nvidia-smi')
|
||||
if self.nvidia_smi is None:
|
||||
self.nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
|
||||
# could not be found from the environment path,
|
||||
# try to find it from system drive with default installation path
|
||||
default_nvidia_smi = "%s\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe" % os.environ['systemdrive']
|
||||
if os.path.isfile(default_nvidia_smi):
|
||||
self.nvidia_smi = default_nvidia_smi
|
||||
else:
|
||||
self.nvidia_smi = "nvidia-smi"
|
||||
|
||||
try:
|
||||
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
|
||||
xmlGpu, stdError = p.communicate()
|
||||
|
||||
smiTree = ET.fromstring(xmlGpu)
|
||||
gpuTree = smiTree.find('gpu')
|
||||
|
||||
try:
|
||||
self.gpuMemoryTotal = gpuTree.find('fb_memory_usage').find('total').text.split(" ")[0]
|
||||
except Exception as e:
|
||||
logging.debug('Failed to get gpuMemoryTotal: "{}".'.format(str(e)))
|
||||
pass
|
||||
try:
|
||||
self.gpuName = gpuTree.find('product_name').text
|
||||
except Exception as e:
|
||||
logging.debug('Failed to get gpuName: "{}".'.format(str(e)))
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logging.debug('Failed to get information from nvidia_smi at init: "{}".'.format(str(e)))
|
||||
|
||||
def _addKV(self, k, v):
|
||||
if isinstance(v, tuple):
|
||||
for ki, vi in v._asdict().items():
|
||||
|
@ -98,6 +79,7 @@ class ComputerStatistics:
|
|||
self.curves[k].append(v)
|
||||
|
||||
def update(self):
|
||||
try:
|
||||
self.initOnFirstTime()
|
||||
self._addKV('cpuUsage', psutil.cpu_percent(percpu=True)) # interval=None => non-blocking (percentage since last call)
|
||||
self._addKV('ramUsage', psutil.virtual_memory().percent)
|
||||
|
@ -105,11 +87,15 @@ class ComputerStatistics:
|
|||
self._addKV('vramUsage', 0)
|
||||
self._addKV('ioCounters', psutil.disk_io_counters())
|
||||
self.updateGpu()
|
||||
except Exception as e:
|
||||
logging.debug('Failed to get statistics: "{}".'.format(str(e)))
|
||||
|
||||
def updateGpu(self):
|
||||
if not self.nvidia_smi:
|
||||
return
|
||||
try:
|
||||
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE)
|
||||
xmlGpu, stdError = p.communicate()
|
||||
p = subprocess.Popen([self.nvidia_smi, "-q", "-x"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
xmlGpu, stdError = p.communicate(timeout=10) # 10 seconds
|
||||
|
||||
smiTree = ET.fromstring(xmlGpu)
|
||||
gpuTree = smiTree.find('gpu')
|
||||
|
@ -129,7 +115,11 @@ class ComputerStatistics:
|
|||
except Exception as e:
|
||||
logging.debug('Failed to get gpuTemperature: "{}".'.format(str(e)))
|
||||
pass
|
||||
|
||||
except subprocess.TimeoutExpired as e:
|
||||
logging.debug('Timeout when retrieving information from nvidia_smi: "{}".'.format(str(e)))
|
||||
p.kill()
|
||||
outs, errs = p.communicate()
|
||||
return
|
||||
except Exception as e:
|
||||
logging.debug('Failed to get information from nvidia_smi: "{}".'.format(str(e)))
|
||||
return
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue