[core] add cgroups checks

This commit is contained in:
Fabien Servant 2022-12-02 05:47:19 +00:00 committed by fabien servant
parent a2f559f48a
commit 788bd7738b
2 changed files with 116 additions and 1 deletions

101
meshroom/core/cgroup.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python
# coding:utf-8
import os
#Try to retrieve limits of memory for the current process' cgroup
def getCgroupMemorySize():
#first of all, get pid of process
pid = os.getpid()
#Get cgroup associated with pid
filename = f"/proc/{pid}/cgroup"
cgroup = None
try:
with open(filename, "r") as f :
#cgroup file is a ':' separated table
#lookup a line where the second field is "memory"
lines = f.readlines()
for line in lines:
tokens = line.rstrip("\r\n").split(":")
if len(tokens) < 3:
continue
if tokens[1] == "memory":
cgroup = tokens[2]
except IOError:
pass
if cgroup is None:
return -1
size = -1
filename = f"/sys/fs/cgroup/memory/{cgroup}"
try:
with open(filename, "r") as f :
value = f.read().rstrip("\r\n")
if value.isnumeric():
size = int(value)
except IOError:
pass
return size
def parseNumericList(str):
nList = []
for item in str.split(','):
if '-' in item:
start, end = item.split('-')
start = int(start)
end = int(end)
nList.extend(range(start, end + 1))
else:
value = int(item)
nList.append(value)
return nList
#Try to retrieve limits of cores for the current process' cgroup
def getCgroupCpuCount():
#first of all, get pid of process
pid = os.getpid()
#Get cgroup associated with pid
filename = f"/proc/{pid}/cgroup"
cgroup = None
try:
with open(filename, "r") as f :
#cgroup file is a ':' separated table
#lookup a line where the second field is "memory"
lines = f.readlines()
for line in lines:
tokens = line.rstrip("\r\n").split(":")
if len(tokens) < 3:
continue
if tokens[1] == "cpuset":
cgroup = tokens[2]
except IOError:
pass
if cgroup is None:
return -1
size = -1
filename = f"/sys/fs/cgroup/cpuset/{cgroup}/cpuset.cpus"
try:
with open(filename, "r") as f :
value = f.read().rstrip("\r\n")
nlist = parseNumericList(value)
size = len(nlist)
except IOError:
pass
return size

View file

@ -1,4 +1,5 @@
from meshroom.common import BaseObject, Property, Variant, VariantList, JSValue from meshroom.common import BaseObject, Property, Variant, VariantList, JSValue
from meshroom.core import pyCompatibility, cgroup
from collections.abc import Iterable from collections.abc import Iterable
from enum import Enum from enum import Enum
@ -534,6 +535,7 @@ class CommandLineNode(Node):
commandLineRange = '' commandLineRange = ''
def buildCommandLine(self, chunk): def buildCommandLine(self, chunk):
cmdPrefix = '' cmdPrefix = ''
# if rez available in env, we use it # if rez available in env, we use it
if 'REZ_ENV' in os.environ and chunk.node.packageVersion: if 'REZ_ENV' in os.environ and chunk.node.packageVersion:
@ -541,10 +543,22 @@ class CommandLineNode(Node):
alreadyInEnv = os.environ.get('REZ_{}_VERSION'.format(chunk.node.packageName.upper()), "").startswith(chunk.node.packageVersion) alreadyInEnv = os.environ.get('REZ_{}_VERSION'.format(chunk.node.packageName.upper()), "").startswith(chunk.node.packageVersion)
if not alreadyInEnv: if not alreadyInEnv:
cmdPrefix = '{rez} {packageFullName} -- '.format(rez=os.environ.get('REZ_ENV'), packageFullName=chunk.node.packageFullName) cmdPrefix = '{rez} {packageFullName} -- '.format(rez=os.environ.get('REZ_ENV'), packageFullName=chunk.node.packageFullName)
cmdSuffix = '' cmdSuffix = ''
if chunk.node.isParallelized and chunk.node.size > 1: if chunk.node.isParallelized and chunk.node.size > 1:
cmdSuffix = ' ' + self.commandLineRange.format(**chunk.range.toDict()) cmdSuffix = ' ' + self.commandLineRange.format(**chunk.range.toDict())
return cmdPrefix + chunk.node.nodeDesc.commandLine.format(**chunk.node._cmdVars) + cmdSuffix
cmdMem = ''
memSize = cgroup.getCgroupMemorySize()
if memSize > 0:
cmdMem = ' --maxMemory={memSize}'.format(memSize=memSize)
cmdCore = ''
coresCount = cgroup.getCgroupCpuCount()
if coresCount > 0:
cmdCore = ' --maxCores={coresCount}'.format(coresCount=coresCount)
return cmdPrefix + chunk.node.nodeDesc.commandLine.format(**chunk.node._cmdVars) + cmdMem + cmdCore + cmdSuffix
def stopProcess(self, chunk): def stopProcess(self, chunk):
# the same node could exists several times in the graph and # the same node could exists several times in the graph and