mirror of
https://github.com/alicevision/Meshroom.git
synced 2025-08-06 10:18:42 +02:00
New notion of local isolated computation for python nodes using meshroom_compute
Reoganization - BaseNode: is the base class for all nodes - Node: is now dedicated to python nodes, with the implentation directly in the process function - CommandLineNode: dedicated to generate and run external command lines
This commit is contained in:
parent
faece7efca
commit
727a4d129b
6 changed files with 288 additions and 152 deletions
|
@ -16,7 +16,7 @@ meshroom.setupEnvironment()
|
||||||
|
|
||||||
import meshroom.core
|
import meshroom.core
|
||||||
import meshroom.core.graph
|
import meshroom.core.graph
|
||||||
from meshroom.core.node import Status
|
from meshroom.core.node import Status, ExecMode
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Execute a Graph of processes.')
|
parser = argparse.ArgumentParser(description='Execute a Graph of processes.')
|
||||||
|
@ -26,6 +26,8 @@ parser.add_argument('--node', metavar='NODE_NAME', type=str,
|
||||||
help='Process the node. It will generate an error if the dependencies are not already computed.')
|
help='Process the node. It will generate an error if the dependencies are not already computed.')
|
||||||
parser.add_argument('--toNode', metavar='NODE_NAME', type=str,
|
parser.add_argument('--toNode', metavar='NODE_NAME', type=str,
|
||||||
help='Process the node with its dependencies.')
|
help='Process the node with its dependencies.')
|
||||||
|
parser.add_argument('--inCurrentEnv', help='Execute process in current env without creating a dedicated runtime environment.',
|
||||||
|
action='store_true')
|
||||||
parser.add_argument('--forceStatus', help='Force computation if status is RUNNING or SUBMITTED.',
|
parser.add_argument('--forceStatus', help='Force computation if status is RUNNING or SUBMITTED.',
|
||||||
action='store_true')
|
action='store_true')
|
||||||
parser.add_argument('--forceCompute', help='Compute in all cases even if already computed.',
|
parser.add_argument('--forceCompute', help='Compute in all cases even if already computed.',
|
||||||
|
@ -81,7 +83,11 @@ if args.node:
|
||||||
chunks = node.chunks
|
chunks = node.chunks
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
if chunk.status.status in submittedStatuses:
|
if chunk.status.status in submittedStatuses:
|
||||||
print('Warning: Node is already submitted with status "{}". See file: "{}"'.format(chunk.status.status.name, chunk.statusFile))
|
# Particular case for the LOCAL_ISOLATED, the node status is set to RUNNING by the submitter directly.
|
||||||
|
# We ensure that no other instance has started to compute, by checking that the sessionUid is empty.
|
||||||
|
if chunk.status.execMode == ExecMode.LOCAL_ISOLATED and not chunk.status.sessionUid and chunk.status.submitterSessionUid:
|
||||||
|
continue
|
||||||
|
print(f'Warning: Node is already submitted with status "{chunk.status.status.name}". See file: "{chunk.statusFile}". ExecMode: {chunk.status.execMode.name}, SessionUid: {chunk.status.sessionUid}, submitterSessionUid: {chunk.status.submitterSessionUid}')
|
||||||
# sys.exit(-1)
|
# sys.exit(-1)
|
||||||
|
|
||||||
if args.extern:
|
if args.extern:
|
||||||
|
@ -91,9 +97,9 @@ if args.node:
|
||||||
node.preprocess()
|
node.preprocess()
|
||||||
if args.iteration != -1:
|
if args.iteration != -1:
|
||||||
chunk = node.chunks[args.iteration]
|
chunk = node.chunks[args.iteration]
|
||||||
chunk.process(args.forceCompute)
|
chunk.process(args.forceCompute, args.inCurrentEnv)
|
||||||
else:
|
else:
|
||||||
node.process(args.forceCompute)
|
node.process(args.forceCompute, args.inCurrentEnv)
|
||||||
node.postprocess()
|
node.postprocess()
|
||||||
else:
|
else:
|
||||||
if args.iteration != -1:
|
if args.iteration != -1:
|
||||||
|
|
|
@ -21,36 +21,9 @@ from .computation import (
|
||||||
)
|
)
|
||||||
from .node import (
|
from .node import (
|
||||||
AVCommandLineNode,
|
AVCommandLineNode,
|
||||||
|
BaseNode,
|
||||||
CommandLineNode,
|
CommandLineNode,
|
||||||
InitNode,
|
InitNode,
|
||||||
InputNode,
|
InputNode,
|
||||||
Node,
|
Node,
|
||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
# attribute
|
|
||||||
"Attribute",
|
|
||||||
"BoolParam",
|
|
||||||
"ChoiceParam",
|
|
||||||
"ColorParam",
|
|
||||||
"File",
|
|
||||||
"FloatParam",
|
|
||||||
"GroupAttribute",
|
|
||||||
"IntParam",
|
|
||||||
"ListAttribute",
|
|
||||||
"PushButtonParam",
|
|
||||||
"StringParam",
|
|
||||||
# computation
|
|
||||||
"DynamicNodeSize",
|
|
||||||
"Level",
|
|
||||||
"MultiDynamicNodeSize",
|
|
||||||
"Parallelization",
|
|
||||||
"Range",
|
|
||||||
"StaticNodeSize",
|
|
||||||
# node
|
|
||||||
"AVCommandLineNode",
|
|
||||||
"CommandLineNode",
|
|
||||||
"InitNode",
|
|
||||||
"InputNode",
|
|
||||||
"Node",
|
|
||||||
]
|
|
||||||
|
|
|
@ -1,16 +1,37 @@
|
||||||
from inspect import getfile
|
from inspect import getfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import psutil
|
import psutil
|
||||||
import shlex
|
import shlex
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
|
||||||
from .computation import Level, StaticNodeSize
|
from .computation import Level, StaticNodeSize
|
||||||
from .attribute import StringParam, ColorParam
|
from .attribute import StringParam, ColorParam
|
||||||
|
|
||||||
|
import meshroom
|
||||||
from meshroom.core import cgroup
|
from meshroom.core import cgroup
|
||||||
|
|
||||||
|
|
||||||
class Node(object):
|
_MESHROOM_ROOT = Path(meshroom.__file__).parent.parent
|
||||||
|
_MESHROOM_COMPUTE = _MESHROOM_ROOT / "bin" / "meshroom_compute"
|
||||||
|
|
||||||
|
|
||||||
|
def isNodeSaved(node):
|
||||||
|
"""Returns whether a node is identical to its serialized counterpart in the current graph file."""
|
||||||
|
filepath = node.graph.filepath
|
||||||
|
if not filepath:
|
||||||
|
return False
|
||||||
|
|
||||||
|
from meshroom.core.graph import loadGraph
|
||||||
|
graphSaved = loadGraph(filepath)
|
||||||
|
nodeSaved = graphSaved.node(node.name)
|
||||||
|
if nodeSaved is None:
|
||||||
|
return False
|
||||||
|
return nodeSaved._uid == node._uid
|
||||||
|
|
||||||
|
class BaseNode(object):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
cpu = Level.NORMAL
|
cpu = Level.NORMAL
|
||||||
|
@ -62,7 +83,7 @@ class Node(object):
|
||||||
category = 'Other'
|
category = 'Other'
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Node, self).__init__()
|
super(BaseNode, self).__init__()
|
||||||
self.hasDynamicOutputAttribute = any(output.isDynamicValue for output in self.outputs)
|
self.hasDynamicOutputAttribute = any(output.isDynamicValue for output in self.outputs)
|
||||||
self.sourceCodeFolder = Path(getfile(self.__class__)).parent.resolve().as_posix()
|
self.sourceCodeFolder = Path(getfile(self.__class__)).parent.resolve().as_posix()
|
||||||
|
|
||||||
|
@ -113,13 +134,102 @@ class Node(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def stopProcess(self, chunk):
|
def stopProcess(self, chunk):
|
||||||
raise NotImplementedError('No stopProcess implementation on node: {}'.format(chunk.node.name))
|
logging.warning(f'No stopProcess implementation on node: {chunk.node.name}')
|
||||||
|
|
||||||
def processChunk(self, chunk):
|
def processChunk(self, chunk):
|
||||||
raise NotImplementedError(f'No processChunk implementation on node: "{chunk.node.name}"')
|
raise NotImplementedError(f'No processChunk implementation on node: "{chunk.node.name}"')
|
||||||
|
|
||||||
|
def executeChunkCommandLine(self, chunk, cmd, env=None):
|
||||||
|
try:
|
||||||
|
with open(chunk.logFile, 'w') as logF:
|
||||||
|
chunk.status.commandLine = cmd
|
||||||
|
chunk.saveStatusFile()
|
||||||
|
cmdList = shlex.split(cmd)
|
||||||
|
# Resolve executable to full path
|
||||||
|
prog = shutil.which(cmdList[0], path=env.get('PATH') if env else None)
|
||||||
|
|
||||||
class InputNode(Node):
|
print(f"Starting Process for '{chunk.node.name}'")
|
||||||
|
print(f' - commandLine: {cmd}')
|
||||||
|
print(f' - logFile: {chunk.logFile}')
|
||||||
|
if prog:
|
||||||
|
cmdList[0] = prog
|
||||||
|
print(f' - command full path: {prog}')
|
||||||
|
|
||||||
|
# Change the process group to avoid Meshroom main process being killed if the subprocess
|
||||||
|
# gets terminated by the user or an Out Of Memory (OOM kill).
|
||||||
|
if sys.platform == "win32":
|
||||||
|
platformArgs = {"creationflags": psutil.CREATE_NEW_PROCESS_GROUP}
|
||||||
|
# Note: DETACHED_PROCESS means fully detached process.
|
||||||
|
# We don't want a fully detached process to ensure that if Meshroom is killed,
|
||||||
|
# the subprocesses are killed too.
|
||||||
|
else:
|
||||||
|
platformArgs = {"start_new_session": True}
|
||||||
|
# Note: "preexec_fn"=os.setsid is the old way before python-3.2
|
||||||
|
|
||||||
|
chunk.subprocess = psutil.Popen(
|
||||||
|
cmdList,
|
||||||
|
stdout=logF,
|
||||||
|
stderr=logF,
|
||||||
|
cwd=chunk.node.internalFolder,
|
||||||
|
env=env,
|
||||||
|
**platformArgs,
|
||||||
|
)
|
||||||
|
|
||||||
|
if hasattr(chunk, "statThread"):
|
||||||
|
# We only have a statThread if the node is running in the current process
|
||||||
|
# and not in a dedicated environment/process.
|
||||||
|
chunk.statThread.proc = chunk.subprocess
|
||||||
|
|
||||||
|
stdout, stderr = chunk.subprocess.communicate()
|
||||||
|
|
||||||
|
chunk.status.returnCode = chunk.subprocess.returncode
|
||||||
|
|
||||||
|
if chunk.subprocess.returncode and chunk.subprocess.returncode < 0:
|
||||||
|
signal_num = -chunk.subprocess.returncode
|
||||||
|
logF.write(f"Process was killed by signal: {signal_num}")
|
||||||
|
try:
|
||||||
|
status = chunk.subprocess.status()
|
||||||
|
logF.write(f"Process status: {status}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if chunk.subprocess.returncode != 0:
|
||||||
|
with open(chunk.logFile, 'r') as logF:
|
||||||
|
logContent = ''.join(logF.readlines())
|
||||||
|
raise RuntimeError('Error on node "{}":\nLog:\n{}'.format(chunk.name, logContent))
|
||||||
|
finally:
|
||||||
|
chunk.subprocess = None
|
||||||
|
|
||||||
|
def stopProcess(self, chunk):
|
||||||
|
# The same node could exists several times in the graph and
|
||||||
|
# only one would have the running subprocess; ignore all others
|
||||||
|
if not chunk.subprocess:
|
||||||
|
print(f"[{chunk.node.name}] stopProcess: no subprocess")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Retrieve process tree
|
||||||
|
processes = chunk.subprocess.children(recursive=True) + [chunk.subprocess]
|
||||||
|
logging.debug(f"[{chunk.node.name}] Processes to stop: {len(processes)}")
|
||||||
|
for process in processes:
|
||||||
|
try:
|
||||||
|
# With terminate, the process has a chance to handle cleanup
|
||||||
|
process.terminate()
|
||||||
|
except psutil.NoSuchProcess:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If it is still running, force kill it
|
||||||
|
for process in processes:
|
||||||
|
try:
|
||||||
|
# Use is_running() instead of poll() as we use a psutil.Process object
|
||||||
|
if process.is_running(): # Check if process is still alive
|
||||||
|
process.kill() # Forcefully kill it
|
||||||
|
except psutil.NoSuchProcess:
|
||||||
|
logging.info(f"[{chunk.node.name}] Process already terminated.")
|
||||||
|
except psutil.AccessDenied:
|
||||||
|
logging.info(f"[{chunk.node.name}] Permission denied to kill the process.")
|
||||||
|
|
||||||
|
|
||||||
|
class InputNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
Node that does not need to be processed, it is just a placeholder for inputs.
|
Node that does not need to be processed, it is just a placeholder for inputs.
|
||||||
"""
|
"""
|
||||||
|
@ -130,7 +240,24 @@ class InputNode(Node):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class CommandLineNode(Node):
|
class Node(BaseNode):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Node, self).__init__()
|
||||||
|
|
||||||
|
def processChunkInEnvironment(self, chunk):
|
||||||
|
if not isNodeSaved(chunk.node):
|
||||||
|
raise RuntimeError("File must be saved before computing in isolated environment.")
|
||||||
|
|
||||||
|
meshroomComputeCmd = f"python {_MESHROOM_COMPUTE} {chunk.node.graph.filepath} --node {chunk.node.name} --extern --inCurrentEnv"
|
||||||
|
if len(chunk.node.getChunks()) > 1:
|
||||||
|
meshroomComputeCmd += f" --iteration {chunk.range.iteration}"
|
||||||
|
|
||||||
|
runtimeEnv = None
|
||||||
|
self.executeChunkCommandLine(chunk, meshroomComputeCmd, env=runtimeEnv)
|
||||||
|
|
||||||
|
|
||||||
|
class CommandLineNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
commandLine = '' # need to be defined on the node
|
commandLine = '' # need to be defined on the node
|
||||||
|
@ -143,14 +270,14 @@ class CommandLineNode(Node):
|
||||||
def buildCommandLine(self, chunk):
|
def buildCommandLine(self, chunk):
|
||||||
|
|
||||||
cmdPrefix = ''
|
cmdPrefix = ''
|
||||||
# If rez available in env, we use it
|
# # If rez available in env, we use it
|
||||||
if "REZ_ENV" in os.environ and chunk.node.packageVersion:
|
# if "REZ_ENV" in os.environ and chunk.node.packageVersion:
|
||||||
# If the node package is already in the environment, we don't need a new dedicated rez environment
|
# # If the node package is already in the environment, we don't need a new dedicated rez environment
|
||||||
alreadyInEnv = os.environ.get("REZ_{}_VERSION".format(chunk.node.packageName.upper()),
|
# alreadyInEnv = os.environ.get("REZ_{}_VERSION".format(chunk.node.packageName.upper()),
|
||||||
"").startswith(chunk.node.packageVersion)
|
# "").startswith(chunk.node.packageVersion)
|
||||||
if not alreadyInEnv:
|
# if not alreadyInEnv:
|
||||||
cmdPrefix = '{rez} {packageFullName} -- '.format(rez=os.environ.get("REZ_ENV"),
|
# cmdPrefix = '{rez} {packageFullName} -- '.format(rez=os.environ.get("REZ_ENV"),
|
||||||
packageFullName=chunk.node.packageFullName)
|
# packageFullName=chunk.node.packageFullName)
|
||||||
|
|
||||||
cmdSuffix = ''
|
cmdSuffix = ''
|
||||||
if chunk.node.isParallelized and chunk.node.size > 1:
|
if chunk.node.isParallelized and chunk.node.size > 1:
|
||||||
|
@ -158,48 +285,10 @@ class CommandLineNode(Node):
|
||||||
|
|
||||||
return cmdPrefix + chunk.node.nodeDesc.commandLine.format(**chunk.node._cmdVars) + cmdSuffix
|
return cmdPrefix + chunk.node.nodeDesc.commandLine.format(**chunk.node._cmdVars) + cmdSuffix
|
||||||
|
|
||||||
def stopProcess(self, chunk):
|
|
||||||
# The same node could exists several times in the graph and
|
|
||||||
# only one would have the running subprocess; ignore all others
|
|
||||||
if not hasattr(chunk, "subprocess"):
|
|
||||||
return
|
|
||||||
if chunk.subprocess:
|
|
||||||
# Kill process tree
|
|
||||||
processes = chunk.subprocess.children(recursive=True) + [chunk.subprocess]
|
|
||||||
try:
|
|
||||||
for process in processes:
|
|
||||||
process.terminate()
|
|
||||||
except psutil.NoSuchProcess:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def processChunk(self, chunk):
|
def processChunk(self, chunk):
|
||||||
try:
|
|
||||||
with open(chunk.logFile, 'w') as logF:
|
|
||||||
cmd = self.buildCommandLine(chunk)
|
cmd = self.buildCommandLine(chunk)
|
||||||
chunk.status.commandLine = cmd
|
# TODO: Setup runtime env
|
||||||
chunk.saveStatusFile()
|
self.executeChunkCommandLine(chunk, cmd)
|
||||||
print(' - commandLine: {}'.format(cmd))
|
|
||||||
print(' - logFile: {}'.format(chunk.logFile))
|
|
||||||
chunk.subprocess = psutil.Popen(shlex.split(cmd), stdout=logF, stderr=logF, cwd=chunk.node.internalFolder)
|
|
||||||
|
|
||||||
# Store process static info into the status file
|
|
||||||
# chunk.status.env = node.proc.environ()
|
|
||||||
# chunk.status.createTime = node.proc.create_time()
|
|
||||||
|
|
||||||
chunk.statThread.proc = chunk.subprocess
|
|
||||||
stdout, stderr = chunk.subprocess.communicate()
|
|
||||||
chunk.subprocess.wait()
|
|
||||||
|
|
||||||
chunk.status.returnCode = chunk.subprocess.returncode
|
|
||||||
|
|
||||||
if chunk.subprocess.returncode != 0:
|
|
||||||
with open(chunk.logFile, 'r') as logF:
|
|
||||||
logContent = ''.join(logF.readlines())
|
|
||||||
raise RuntimeError('Error on node "{}":\nLog:\n{}'.format(chunk.name, logContent))
|
|
||||||
except Exception:
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
chunk.subprocess = None
|
|
||||||
|
|
||||||
|
|
||||||
# Specific command line node for AliceVision apps
|
# Specific command line node for AliceVision apps
|
||||||
|
@ -282,3 +371,4 @@ class InitNode(object):
|
||||||
for attr in attributesDict:
|
for attr in attributesDict:
|
||||||
if node.hasAttribute(attr):
|
if node.hasAttribute(attr):
|
||||||
node.attribute(attr).value = attributesDict[attr]
|
node.attribute(attr).value = attributesDict[attr]
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ class Status(Enum):
|
||||||
class ExecMode(Enum):
|
class ExecMode(Enum):
|
||||||
NONE = auto()
|
NONE = auto()
|
||||||
LOCAL = auto()
|
LOCAL = auto()
|
||||||
|
LOCAL_ISOLATED = auto()
|
||||||
EXTERN = auto()
|
EXTERN = auto()
|
||||||
|
|
||||||
|
|
||||||
|
@ -67,20 +68,13 @@ class StatusData(BaseObject):
|
||||||
|
|
||||||
def __init__(self, nodeName='', nodeType='', packageName='', packageVersion='', parent: BaseObject = None):
|
def __init__(self, nodeName='', nodeType='', packageName='', packageVersion='', parent: BaseObject = None):
|
||||||
super(StatusData, self).__init__(parent)
|
super(StatusData, self).__init__(parent)
|
||||||
self.status = Status.NONE
|
self.reset()
|
||||||
self.execMode = ExecMode.NONE
|
self.nodeName: str = nodeName
|
||||||
self.nodeName = nodeName
|
self.nodeType: str = nodeType
|
||||||
self.nodeType = nodeType
|
self.packageName: str = packageName
|
||||||
self.packageName = packageName
|
self.packageVersion: str = packageVersion
|
||||||
self.packageVersion = packageVersion
|
self.sessionUid: Optional[str] = meshroom.core.sessionUid
|
||||||
self.graph = ''
|
self.submitterSessionUid: Optional[str] = None
|
||||||
self.commandLine = None
|
|
||||||
self.env = None
|
|
||||||
self.startDateTime = ""
|
|
||||||
self.endDateTime = ""
|
|
||||||
self.elapsedTime = 0
|
|
||||||
self.hostname = ""
|
|
||||||
self.sessionUid = meshroom.core.sessionUid
|
|
||||||
|
|
||||||
def merge(self, other):
|
def merge(self, other):
|
||||||
self.startDateTime = min(self.startDateTime, other.startDateTime)
|
self.startDateTime = min(self.startDateTime, other.startDateTime)
|
||||||
|
@ -88,27 +82,44 @@ class StatusData(BaseObject):
|
||||||
self.elapsedTime += other.elapsedTime
|
self.elapsedTime += other.elapsedTime
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.status = Status.NONE
|
self.nodeName: str = ""
|
||||||
self.execMode = ExecMode.NONE
|
self.nodeType: str = ""
|
||||||
self.graph = ''
|
self.packageName: str = ""
|
||||||
self.commandLine = None
|
self.packageVersion: str = ""
|
||||||
self.env = None
|
self.resetDynamicValues()
|
||||||
self.startDateTime = ""
|
|
||||||
self.endDateTime = ""
|
def resetDynamicValues(self):
|
||||||
self.elapsedTime = 0
|
self.status: Status = Status.NONE
|
||||||
self.hostname = ""
|
self.execMode: ExecMode = ExecMode.NONE
|
||||||
self.sessionUid = meshroom.core.sessionUid
|
self.graph = ""
|
||||||
|
self.commandLine: str = ""
|
||||||
|
self.env: str = ""
|
||||||
|
self._startTime: Optional[datetime.datetime] = None
|
||||||
|
self.startDateTime: str = ""
|
||||||
|
self.endDateTime: str = ""
|
||||||
|
self.elapsedTime: float = 0.0
|
||||||
|
self.hostname: str = ""
|
||||||
|
|
||||||
def initStartCompute(self):
|
def initStartCompute(self):
|
||||||
import platform
|
import platform
|
||||||
self.sessionUid = meshroom.core.sessionUid
|
self.sessionUid = meshroom.core.sessionUid
|
||||||
self.hostname = platform.node()
|
self.hostname = platform.node()
|
||||||
|
self._startTime = time.time()
|
||||||
self.startDateTime = datetime.datetime.now().strftime(self.dateTimeFormatting)
|
self.startDateTime = datetime.datetime.now().strftime(self.dateTimeFormatting)
|
||||||
# to get datetime obj: datetime.datetime.strptime(obj, self.dateTimeFormatting)
|
# to get datetime obj: datetime.datetime.strptime(obj, self.dateTimeFormatting)
|
||||||
|
|
||||||
|
def initSubmit(self):
|
||||||
|
''' When submitting a node, we reset the status information to ensure that we do not keep outdated information.
|
||||||
|
'''
|
||||||
|
self.resetDynamicValues()
|
||||||
|
self.sessionUid = None
|
||||||
|
self.submitterSessionUid = meshroom.core.sessionUid
|
||||||
|
|
||||||
def initEndCompute(self):
|
def initEndCompute(self):
|
||||||
self.sessionUid = meshroom.core.sessionUid
|
self.sessionUid = meshroom.core.sessionUid
|
||||||
self.endDateTime = datetime.datetime.now().strftime(self.dateTimeFormatting)
|
self.endDateTime = datetime.datetime.now().strftime(self.dateTimeFormatting)
|
||||||
|
if self._startTime != None:
|
||||||
|
self.elapsedTime = time.time() - self._startTime
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def elapsedTimeStr(self):
|
def elapsedTimeStr(self):
|
||||||
|
@ -118,9 +129,12 @@ class StatusData(BaseObject):
|
||||||
d = self.__dict__.copy()
|
d = self.__dict__.copy()
|
||||||
d["elapsedTimeStr"] = self.elapsedTimeStr
|
d["elapsedTimeStr"] = self.elapsedTimeStr
|
||||||
|
|
||||||
# Skip non data attributes from BaseObject
|
# Skip some attributes (some are from BaseObject)
|
||||||
d.pop("destroyed", None)
|
d.pop("destroyed", None)
|
||||||
d.pop("objectNameChanged", None)
|
d.pop("objectNameChanged", None)
|
||||||
|
d.pop("_parent", None)
|
||||||
|
d.pop("_startTime", None)
|
||||||
|
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def fromDict(self, d):
|
def fromDict(self, d):
|
||||||
|
@ -142,6 +156,7 @@ class StatusData(BaseObject):
|
||||||
self.elapsedTime = d.get('elapsedTime', 0)
|
self.elapsedTime = d.get('elapsedTime', 0)
|
||||||
self.hostname = d.get('hostname', '')
|
self.hostname = d.get('hostname', '')
|
||||||
self.sessionUid = d.get('sessionUid', '')
|
self.sessionUid = d.get('sessionUid', '')
|
||||||
|
self.submitterSessionUid = d.get('submitterSessionUid', '')
|
||||||
|
|
||||||
|
|
||||||
class LogManager:
|
class LogManager:
|
||||||
|
@ -251,9 +266,9 @@ class NodeChunk(BaseObject):
|
||||||
super(NodeChunk, self).__init__(parent)
|
super(NodeChunk, self).__init__(parent)
|
||||||
self.node = node
|
self.node = node
|
||||||
self.range = range
|
self.range = range
|
||||||
self.logManager = LogManager(self)
|
self.logManager: LogManager = LogManager(self)
|
||||||
self._status = StatusData(node.name, node.nodeType, node.packageName, node.packageVersion)
|
self._status: StatusData = StatusData(node.name, node.nodeType, node.packageName, node.packageVersion)
|
||||||
self.statistics = stats.Statistics()
|
self.statistics: stats.Statistics = stats.Statistics()
|
||||||
self.statusFileLastModTime = -1
|
self.statusFileLastModTime = -1
|
||||||
self.subprocess = None
|
self.subprocess = None
|
||||||
# Notify update in filepaths when node's internal folder changes
|
# Notify update in filepaths when node's internal folder changes
|
||||||
|
@ -298,6 +313,7 @@ class NodeChunk(BaseObject):
|
||||||
try:
|
try:
|
||||||
with open(statusFile, 'r') as jsonFile:
|
with open(statusFile, 'r') as jsonFile:
|
||||||
statusData = json.load(jsonFile)
|
statusData = json.load(jsonFile)
|
||||||
|
# logging.debug(f"updateStatusFromCache({self.node.name}): From status {self.status.status} to {statusData['status']}")
|
||||||
self.status.fromDict(statusData)
|
self.status.fromDict(statusData)
|
||||||
self.statusFileLastModTime = os.path.getmtime(statusFile)
|
self.statusFileLastModTime = os.path.getmtime(statusFile)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -343,12 +359,9 @@ class NodeChunk(BaseObject):
|
||||||
renameWritingToFinalPath(statusFilepathWriting, statusFilepath)
|
renameWritingToFinalPath(statusFilepathWriting, statusFilepath)
|
||||||
|
|
||||||
def upgradeStatusTo(self, newStatus, execMode=None):
|
def upgradeStatusTo(self, newStatus, execMode=None):
|
||||||
if newStatus.value <= self._status.status.value:
|
if newStatus.value < self._status.status.value:
|
||||||
logging.warning("Downgrade status on node '{}' from {} to {}".
|
logging.warning(f"Downgrade status on node '{self.name}' from {self._status.status} to {newStatus}")
|
||||||
format(self.name, self._status.status, newStatus))
|
|
||||||
|
|
||||||
if newStatus == Status.SUBMITTED:
|
|
||||||
self._status = StatusData(self.node.name, self.node.nodeType, self.node.packageName, self.node.packageVersion)
|
|
||||||
if execMode is not None:
|
if execMode is not None:
|
||||||
self._status.execMode = execMode
|
self._status.execMode = execMode
|
||||||
self.execModeNameChanged.emit()
|
self.execModeNameChanged.emit()
|
||||||
|
@ -397,15 +410,22 @@ class NodeChunk(BaseObject):
|
||||||
def isFinished(self):
|
def isFinished(self):
|
||||||
return self._status.status == Status.SUCCESS
|
return self._status.status == Status.SUCCESS
|
||||||
|
|
||||||
def process(self, forceCompute=False):
|
def process(self, forceCompute=False, inCurrentEnv=False):
|
||||||
if not forceCompute and self._status.status == Status.SUCCESS:
|
if not forceCompute and self._status.status == Status.SUCCESS:
|
||||||
logging.info("Node chunk already computed: {}".format(self.name))
|
logging.info("Node chunk already computed: {}".format(self.name))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Start the process environment for nodes running in isolation.
|
||||||
|
# This only happens once, when the node has the SUBMITTED status.
|
||||||
|
# The sub-process will go through this method again, but the node status will have been set to RUNNING.
|
||||||
|
if not inCurrentEnv and isinstance(self.node.nodeDesc, desc.Node):
|
||||||
|
self._processInIsolatedEnvironment()
|
||||||
|
return
|
||||||
|
|
||||||
global runningProcesses
|
global runningProcesses
|
||||||
runningProcesses[self.name] = self
|
runningProcesses[self.name] = self
|
||||||
self._status.initStartCompute()
|
self._status.initStartCompute()
|
||||||
exceptionStatus = None
|
executionStatus = None
|
||||||
startTime = time.time()
|
|
||||||
self.upgradeStatusTo(Status.RUNNING)
|
self.upgradeStatusTo(Status.RUNNING)
|
||||||
self.statThread = stats.StatisticsThread(self)
|
self.statThread = stats.StatisticsThread(self)
|
||||||
self.statThread.start()
|
self.statThread.start()
|
||||||
|
@ -413,18 +433,18 @@ class NodeChunk(BaseObject):
|
||||||
self.node.nodeDesc.processChunk(self)
|
self.node.nodeDesc.processChunk(self)
|
||||||
# NOTE: this assumes saving the output attributes for each chunk
|
# NOTE: this assumes saving the output attributes for each chunk
|
||||||
self.node.saveOutputAttr()
|
self.node.saveOutputAttr()
|
||||||
|
executionStatus = Status.SUCCESS
|
||||||
except Exception:
|
except Exception:
|
||||||
if self._status.status != Status.STOPPED:
|
if self._status.status != Status.STOPPED:
|
||||||
exceptionStatus = Status.ERROR
|
executionStatus = Status.ERROR
|
||||||
raise
|
raise
|
||||||
except (KeyboardInterrupt, SystemError, GeneratorExit):
|
except (KeyboardInterrupt, SystemError, GeneratorExit):
|
||||||
exceptionStatus = Status.STOPPED
|
executionStatus = Status.STOPPED
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
self._status.initEndCompute()
|
self._status.initEndCompute()
|
||||||
self._status.elapsedTime = time.time() - startTime
|
if executionStatus:
|
||||||
if exceptionStatus is not None:
|
self.upgradeStatusTo(executionStatus)
|
||||||
self.upgradeStatusTo(exceptionStatus)
|
|
||||||
logging.info(" - elapsed time: {}".format(self._status.elapsedTimeStr))
|
logging.info(" - elapsed time: {}".format(self._status.elapsedTimeStr))
|
||||||
# Ask and wait for the stats thread to stop
|
# Ask and wait for the stats thread to stop
|
||||||
self.statThread.stopRequest()
|
self.statThread.stopRequest()
|
||||||
|
@ -432,19 +452,43 @@ class NodeChunk(BaseObject):
|
||||||
self.statistics = stats.Statistics()
|
self.statistics = stats.Statistics()
|
||||||
del runningProcesses[self.name]
|
del runningProcesses[self.name]
|
||||||
|
|
||||||
self.upgradeStatusTo(Status.SUCCESS)
|
|
||||||
|
def _processInIsolatedEnvironment(self):
|
||||||
|
"""Process this node chunk in the isolated environment defined in the environment configuration."""
|
||||||
|
try:
|
||||||
|
self._status.initSubmit()
|
||||||
|
self.upgradeStatusTo(Status.RUNNING, execMode=ExecMode.LOCAL_ISOLATED)
|
||||||
|
self.node.nodeDesc.processChunkInEnvironment(self)
|
||||||
|
except:
|
||||||
|
# status should be already updated by meshroom_compute
|
||||||
|
self.updateStatusFromCache()
|
||||||
|
if self._status.status != Status.ERROR:
|
||||||
|
# If meshroom_compute has crashed or been killed, the status may have not been set to ERROR.
|
||||||
|
# In this particular case, we enforce it from here.
|
||||||
|
self.upgradeStatusTo(Status.ERROR)
|
||||||
|
raise
|
||||||
|
# Update the chunk status.
|
||||||
|
self.updateStatusFromCache()
|
||||||
|
# Update the output attributes, as any chunk may have modified them.
|
||||||
|
self.node.updateOutputAttr()
|
||||||
|
|
||||||
def stopProcess(self):
|
def stopProcess(self):
|
||||||
if not self.isExtern():
|
if self.isExtern():
|
||||||
if self._status.status == Status.RUNNING:
|
return
|
||||||
|
if self._status.status != Status.RUNNING:
|
||||||
|
return
|
||||||
|
|
||||||
self.upgradeStatusTo(Status.STOPPED)
|
self.upgradeStatusTo(Status.STOPPED)
|
||||||
elif self._status.status == Status.SUBMITTED:
|
|
||||||
self.upgradeStatusTo(Status.NONE)
|
|
||||||
self.node.nodeDesc.stopProcess(self)
|
self.node.nodeDesc.stopProcess(self)
|
||||||
|
|
||||||
def isExtern(self):
|
def isExtern(self):
|
||||||
return self._status.execMode == ExecMode.EXTERN or (
|
""" The computation is managed externally by another instance of Meshroom, or by meshroom_compute on renderfarm).
|
||||||
self._status.execMode == ExecMode.LOCAL and self._status.sessionUid != meshroom.core.sessionUid)
|
In the ambiguous case of an isolated environment, it is considered as local as we can stop it.
|
||||||
|
"""
|
||||||
|
if self._status.execMode == ExecMode.LOCAL_ISOLATED:
|
||||||
|
# It is a local isolated node, check if it is submitted by our current session.
|
||||||
|
return self._status.submitterSessionUid != meshroom.core.sessionUid
|
||||||
|
return self._status.sessionUid != meshroom.core.sessionUid
|
||||||
|
|
||||||
statusChanged = Signal()
|
statusChanged = Signal()
|
||||||
status = Property(Variant, lambda self: self._status, notify=statusChanged)
|
status = Property(Variant, lambda self: self._status, notify=statusChanged)
|
||||||
|
@ -845,7 +889,13 @@ class BaseNode(BaseObject):
|
||||||
Status will be reset to Status.NONE
|
Status will be reset to Status.NONE
|
||||||
"""
|
"""
|
||||||
if self.internalFolder and os.path.exists(self.internalFolder):
|
if self.internalFolder and os.path.exists(self.internalFolder):
|
||||||
|
try:
|
||||||
shutil.rmtree(self.internalFolder)
|
shutil.rmtree(self.internalFolder)
|
||||||
|
except Exception as e:
|
||||||
|
# We could get some "Device or resource busy" on .nfs file while removing the folder on linux network.
|
||||||
|
# On windows, some output files may be open for visualization and the removal will fail.
|
||||||
|
# On both cases, we can ignore it.
|
||||||
|
logging.warning(f"Failed to remove internal folder: '{self.internalFolder}'. Error: {e}.")
|
||||||
self.updateStatusFromCache()
|
self.updateStatusFromCache()
|
||||||
|
|
||||||
@Slot(result=str)
|
@Slot(result=str)
|
||||||
|
@ -1063,6 +1113,7 @@ class BaseNode(BaseObject):
|
||||||
def submit(self, forceCompute=False):
|
def submit(self, forceCompute=False):
|
||||||
for chunk in self._chunks:
|
for chunk in self._chunks:
|
||||||
if forceCompute or chunk.status.status != Status.SUCCESS:
|
if forceCompute or chunk.status.status != Status.SUCCESS:
|
||||||
|
chunk._status.initSubmit()
|
||||||
chunk.upgradeStatusTo(Status.SUBMITTED, ExecMode.EXTERN)
|
chunk.upgradeStatusTo(Status.SUBMITTED, ExecMode.EXTERN)
|
||||||
|
|
||||||
def beginSequence(self, forceCompute=False):
|
def beginSequence(self, forceCompute=False):
|
||||||
|
@ -1077,9 +1128,9 @@ class BaseNode(BaseObject):
|
||||||
# Invoke the Node Description's pre-process for the Client Node to prepare its processing
|
# Invoke the Node Description's pre-process for the Client Node to prepare its processing
|
||||||
self.nodeDesc.preprocess(self)
|
self.nodeDesc.preprocess(self)
|
||||||
|
|
||||||
def process(self, forceCompute=False):
|
def process(self, forceCompute=False, inCurrentEnv=False):
|
||||||
for chunk in self._chunks:
|
for chunk in self._chunks:
|
||||||
chunk.process(forceCompute)
|
chunk.process(forceCompute, inCurrentEnv)
|
||||||
|
|
||||||
def postprocess(self):
|
def postprocess(self):
|
||||||
# Invoke the post process on Client Node to execute after the processing on the node is completed
|
# Invoke the post process on Client Node to execute after the processing on the node is completed
|
||||||
|
@ -1090,8 +1141,8 @@ class BaseNode(BaseObject):
|
||||||
return
|
return
|
||||||
if not self.nodeDesc.hasDynamicOutputAttribute:
|
if not self.nodeDesc.hasDynamicOutputAttribute:
|
||||||
return
|
return
|
||||||
# logging.warning("updateOutputAttr: {}, status: {}".format(self.name, self.globalStatus))
|
# logging.warning(f"updateOutputAttr: {self.name}, status: {self.globalStatus}")
|
||||||
if self.getGlobalStatus() == Status.SUCCESS:
|
if Status.SUCCESS in [c._status.status for c in self.getChunks()]:
|
||||||
self.loadOutputAttr()
|
self.loadOutputAttr()
|
||||||
else:
|
else:
|
||||||
self.resetOutputAttr()
|
self.resetOutputAttr()
|
||||||
|
@ -1339,19 +1390,33 @@ class BaseNode(BaseObject):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def submitterStatusInThisSession(self):
|
||||||
|
if not self._chunks:
|
||||||
|
return False
|
||||||
|
for chunk in self._chunks:
|
||||||
|
if chunk.status.submitterSessionUid != meshroom.core.sessionUid:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
@Slot(result=bool)
|
@Slot(result=bool)
|
||||||
def canBeStopped(self):
|
def canBeStopped(self):
|
||||||
# Only locked nodes running in local with the same
|
# Only locked nodes running in local with the same
|
||||||
# sessionUid as the Meshroom instance can be stopped
|
# sessionUid as the Meshroom instance can be stopped
|
||||||
return (self.locked and self.getGlobalStatus() == Status.RUNNING and
|
# logging.warning(f"[{self.name}] canBeStopped: globalExecMode={self.globalExecMode} globalStatus={self.getGlobalStatus()} statusInThisSession={self.statusInThisSession()}, submitterStatusInThisSession={self.submitterStatusInThisSession()}")
|
||||||
self.globalExecMode == "LOCAL" and self.statusInThisSession())
|
return (self.getGlobalStatus() == Status.RUNNING and
|
||||||
|
((self.globalExecMode == ExecMode.LOCAL.name and self.statusInThisSession()) or
|
||||||
|
(self.globalExecMode == ExecMode.LOCAL_ISOLATED.name and self.submitterStatusInThisSession())
|
||||||
|
))
|
||||||
|
|
||||||
@Slot(result=bool)
|
@Slot(result=bool)
|
||||||
def canBeCanceled(self):
|
def canBeCanceled(self):
|
||||||
# Only locked nodes submitted in local with the same
|
# Only locked nodes submitted in local with the same
|
||||||
# sessionUid as the Meshroom instance can be canceled
|
# sessionUid as the Meshroom instance can be canceled
|
||||||
return (self.locked and self.getGlobalStatus() == Status.SUBMITTED and
|
# logging.warning(f"[{self.name}] canBeCanceled: globalExecMode={self.globalExecMode} globalStatus={self.getGlobalStatus()} statusInThisSession={self.statusInThisSession()}, submitterStatusInThisSession={self.submitterStatusInThisSession()}")
|
||||||
self.globalExecMode == "LOCAL" and self.statusInThisSession())
|
return (self.getGlobalStatus() == Status.SUBMITTED and
|
||||||
|
((self.globalExecMode == ExecMode.LOCAL.name and self.statusInThisSession()) or
|
||||||
|
(self.globalExecMode == ExecMode.LOCAL_ISOLATED.name and self.submitterStatusInThisSession())
|
||||||
|
))
|
||||||
|
|
||||||
def hasImageOutputAttribute(self):
|
def hasImageOutputAttribute(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -168,6 +168,7 @@ class _NodeCreator:
|
||||||
|
|
||||||
def _createNode(self) -> Node:
|
def _createNode(self) -> Node:
|
||||||
logging.info(f"Creating node '{self.name}'")
|
logging.info(f"Creating node '{self.name}'")
|
||||||
|
# TODO: user inputs/outputs may conflicts with internal names (like position, uid)
|
||||||
return Node(
|
return Node(
|
||||||
self.nodeType,
|
self.nodeType,
|
||||||
position=self.position,
|
position=self.position,
|
||||||
|
|
|
@ -183,10 +183,11 @@ class ChunksMonitor(QObject):
|
||||||
return self.statusFiles, self.monitorableChunks
|
return self.statusFiles, self.monitorableChunks
|
||||||
elif self.filePollerRefresh is PollerRefreshStatus.MINIMAL_ENABLED.value:
|
elif self.filePollerRefresh is PollerRefreshStatus.MINIMAL_ENABLED.value:
|
||||||
for c in self.monitorableChunks:
|
for c in self.monitorableChunks:
|
||||||
# Only chunks that are run externally should be monitored; when run locally, status changes are already notified
|
# Only chunks that are run externally or local_isolated should be monitored,
|
||||||
if c.isExtern():
|
# when run locally, status changes are already notified.
|
||||||
# Chunks with an ERROR status may be re-submitted externally and should thus still be monitored
|
# Chunks with an ERROR status may be re-submitted externally and should thus still be monitored
|
||||||
if c._status.status in {Status.SUBMITTED, Status.RUNNING, Status.ERROR}:
|
if (c.isExtern() and c._status.status in (Status.SUBMITTED, Status.RUNNING, Status.ERROR)) or (
|
||||||
|
(c._status.execMode is ExecMode.LOCAL_ISOLATED) and (c._status.status in (Status.SUBMITTED, Status.RUNNING))):
|
||||||
files.append(c.statusFile)
|
files.append(c.statusFile)
|
||||||
chunks.append(c)
|
chunks.append(c)
|
||||||
return files, chunks
|
return files, chunks
|
||||||
|
@ -582,8 +583,8 @@ class UIGraph(QObject):
|
||||||
def updateGraphComputingStatus(self):
|
def updateGraphComputingStatus(self):
|
||||||
# update graph computing status
|
# update graph computing status
|
||||||
computingLocally = any([
|
computingLocally = any([
|
||||||
(ch.status.execMode == ExecMode.LOCAL and
|
(((ch.status.execMode == ExecMode.LOCAL and ch.status.sessionUid == sessionUid) or
|
||||||
ch.status.sessionUid == sessionUid and
|
ch.status.execMode == ExecMode.LOCAL_ISOLATED) and
|
||||||
ch.status.status in (Status.RUNNING, Status.SUBMITTED))
|
ch.status.status in (Status.RUNNING, Status.SUBMITTED))
|
||||||
for ch in self._sortedDFSChunks])
|
for ch in self._sortedDFSChunks])
|
||||||
submitted = any([ch.status.status == Status.SUBMITTED for ch in self._sortedDFSChunks])
|
submitted = any([ch.status.status == Status.SUBMITTED for ch in self._sortedDFSChunks])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue