1
2 """Encapsulates all necessary things for a cluster-job, like setting
3 up, running, restarting"""
4
5 import os,sys,subprocess
6 from os import path,unlink
7 from threading import Thread,Lock,Timer
8
9 from PyFoam.Applications.Decomposer import Decomposer
10 from PyFoam.Applications.Runner import Runner
11 from PyFoam.Applications.SteadyRunner import SteadyRunner
12 from PyFoam.Applications.CloneCase import CloneCase
13 from PyFoam.FoamInformation import changeFoamVersion
14 from PyFoam.FoamInformation import foamVersion as getFoamVersion
15 from PyFoam.Error import error,warning
16 from PyFoam import configuration as config
17 from PyFoam.FoamInformation import oldAppConvention as oldApp
18 from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
19
20 from PyFoam.ThirdParty.six import print_,iteritems
21
35
36
38 """ All Cluster-jobs are to be derived from this base-class
39
40 The actual jobs are implemented by overriding methods
41
42 There is a number of variables in this class that are used to
43 'communicate' information between the various stages"""
44
45 - def __init__(self,
46 basename,
47 arrayJob=False,
48 hardRestart=False,
49 autoParallel=True,
50 doAutoReconstruct=None,
51 foamVersion=None,
52 compileOption=None,
53 useFoamMPI=False,
54 multiRegion=False,
55 parameters={},
56 isDecomposed=False):
57 """Initializes the Job
58 @param basename: Basis name of the job
59 @param arrayJob: this job is a parameter variation. The tasks
60 are identified by their task-id
61 @param hardRestart: treat the job as restarted
62 @param autoParallel: Parallelization is handled by the base-class
63 @param doAutoReconstruct: Automatically reconstruct the case if
64 autoParalellel is set. If the value is None then it is looked up from
65 the configuration
66 @param foamVersion: The foam-Version that is to be used
67 @param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
68 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
69 @param multiRegion: This job consists of multiple regions
70 @param parameters: Dictionary with parameters that are being passed to the Runner
71 @param isDecomposed: Assume that the job is already decomposed"""
72
73
74
75 if not "JOB_ID" in os.environ:
76 error("Not an SGE-job. Environment variable JOB_ID is missing")
77 self.jobID=int(os.environ["JOB_ID"])
78 self.jobName=os.environ["JOB_NAME"]
79
80 self.basename=path.join(path.abspath(path.curdir),basename)
81
82 sgeRestarted=False
83 if "RESTARTED" in os.environ:
84 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
85
86 if sgeRestarted or hardRestart:
87 self.restarted=True
88 else:
89 self.restarted=False
90
91 if foamVersion==None:
92 foamVersion=config().get("OpenFOAM","Version")
93
94 changeFoamVersion(foamVersion,compileOption=compileOption)
95
96 if not "WM_PROJECT_VERSION" in os.environ:
97 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
98
99 self.autoParallel=autoParallel
100
101 self.doAutoReconstruct=doAutoReconstruct
102 if self.doAutoReconstruct==None:
103 self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
104
105 self.multiRegion=multiRegion
106
107 self.parameters=parameters
108
109 self.hostfile=None
110 self.nproc=1
111
112 if "NSLOTS" in os.environ:
113 self.nproc=int(os.environ["NSLOTS"])
114 self.message("Running on",self.nproc,"CPUs")
115 if self.nproc>1:
116
117 self.hostfile=path.join(os.environ["TMP"],"machines")
118 self.message("Using the machinefile",self.hostfile)
119 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
120
121 self.ordinaryEnd=True
122 self.listenToTimer=False
123
124 self.taskID=None
125 self.arrayJob=arrayJob
126
127 if self.arrayJob:
128 self.taskID=int(os.environ["SGE_TASK_ID"])
129
130 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
131
132 self.message("Adding Cluster-specific paths")
133 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
134 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
135
136 self.isDecomposed=isDecomposed
137
139 """Return a string with the full job-ID"""
140 result=str(self.jobID)
141 if self.arrayJob:
142 result+=":"+str(self.taskID)
143 return result
144
151
158
160 """The file with the job information"""
161 jobfile="%s.%d" % (self.jobName,self.jobID)
162 if self.arrayJob:
163 jobfile+=".%d" % self.taskID
164 jobfile+=".pyFoam.clusterjob"
165 jobfile=path.join(path.dirname(self.basename),jobfile)
166
167 return jobfile
168
170 """The file that makes the job write a checkpoint"""
171 return self.jobFile()+".checkpoint"
172
174 """The file that makes the job write a checkpoint and end"""
175 return self.jobFile()+".stop"
176
242
244 """Returns the actual directory of the case
245 To be overridden if appropriate"""
246 if self.arrayJob:
247 return "%s.%05d" % (self.basename,self.taskID)
248 else:
249 return self.basename
250
252 """Returns just the name of the case"""
253 return path.basename(self.casedir())
254
256 """Execute a shell command in the case directory. No checking done
257 @param cmd: the command as a string"""
258 oldDir=os.getcwd()
259 self.message("Changing directory to",self.casedir())
260 os.chdir(self.casedir())
261 self.message("Executing",cmd)
262 try:
263 retcode = subprocess.call(cmd,shell=True)
264 if retcode < 0:
265 self.message(cmd,"was terminated by signal", -retcode)
266 else:
267 self.message(cmd,"returned", retcode)
268 except OSError:
269 e = sys.exc_info()[1]
270 self.message(cmd,"Execution failed:", e)
271
272 self.message("Executiong of",cmd,"ended")
273 self.message("Changing directory back to",oldDir)
274 os.chdir(oldDir)
275
276 - def foamRun(self,application,
277 args=[],
278 foamArgs=[],
279 steady=False,
280 multiRegion=None,
281 progress=False,
282 noLog=False):
283 """Runs a foam utility on the case.
284 If it is a parallel job and the grid has
285 already been decomposed (and not yet reconstructed) it is run in
286 parallel
287 @param application: the Foam-Application that is to be run
288 @param foamArgs: A list if with the additional arguments for the
289 Foam-Application
290 @param args: A list with additional arguments for the Runner-object
291 @param steady: Use the steady-runner
292 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
293 @param progress: Only output the time and nothing else
294 @param noLog: Do not generate a logfile"""
295
296 arglist=args[:]
297 arglist+=["--job-id=%s" % self.fullJobId()]
298 for k,v in iteritems(self.parameters):
299 arglist+=["--parameter=%s:%s" % (str(k),str(v))]
300
301 if self.isDecomposed and self.nproc>1:
302 arglist+=["--procnr=%d" % self.nproc,
303 "--machinefile=%s" % self.hostfile]
304
305 if progress:
306 arglist+=["--progress"]
307 if noLog:
308 arglist+=["--no-log"]
309
310 if self.multiRegion:
311 if multiRegion==None or multiRegion==True:
312 arglist+=["--all-regions"]
313 elif multiRegion and not self.multiRegion:
314 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
315
316 if self.restarted:
317 arglist+=["--restart"]
318
319 arglist+=[application]
320 if oldApp():
321 arglist+=[".",self.casename()]
322 else:
323 arglist+=["-case",self.casename()]
324
325 arglist+=foamArgs
326
327 self.message("Executing",arglist)
328
329 if steady:
330 self.message("Running Steady")
331 runner=SteadyRunner(args=arglist)
332 else:
333 runner=Runner(args=arglist)
334
336 """Automatically decomposes the grid with a metis-algorithm"""
337
338 if path.isdir(path.join(self.casedir(),"processor0")):
339 warning("A processor directory already exists. There might be a problem")
340
341 defaultMethod="metis"
342
343 if getFoamVersion()>=(1,6):
344 defaultMethod="scotch"
345
346 args=["--method="+defaultMethod,
347 "--clear",
348 self.casename(),
349 self.nproc,
350 "--job-id=%s" % self.fullJobId()]
351
352 if self.multiRegion:
353 args.append("--all-regions")
354
355 deco=Decomposer(args=args)
356
358 """Default reconstruction of a parallel run"""
359
360 if self.doAutoReconstruct:
361 self.isDecomposed=False
362
363 self.foamRun("reconstructPar",
364 args=["--logname=ReconstructPar"])
365 else:
366 self.message("No reconstruction (because asked to)")
367
368 - def setup(self,parameters):
369 """Set up the job. Called in the beginning if the
370 job has not been restarted
371
372 Usual tasks include grid conversion/setup, mesh decomposition etc
373
374 @param parameters: a dictionary with parameters"""
375
376 pass
377
378 - def postDecomposeSetup(self,parameters):
379 """Additional setup, to be executed when the grid is already decomposed
380
381 Usually for tasks that can be done on a decomposed grid
382
383 @param parameters: a dictionary with parameters"""
384
385 pass
386
387 - def run(self,parameters):
388 """Run the actual job. Usually the solver.
389 @param parameters: a dictionary with parameters"""
390
391 pass
392
394 """Additional cleanup, to be executed when the grid is still decomposed
395
396 Usually for tasks that can be done on a decomposed grid
397
398 @param parameters: a dictionary with parameters"""
399
400 pass
401
403 """Clean up after a job
404 @param parameters: a dictionary with parameters"""
405
406 pass
407
409 """Additional reconstruction of parallel runs (Stuff that the
410 OpenFOAM-reconstructPar doesn't do
411 @param parameters: a dictionary with parameters"""
412
413 pass
414
416 """Parameters for a specific task
417 @param id: the id of the task
418 @return: a dictionary with parameters for this task"""
419
420 error("taskParameter not implemented. Not a parameterized job")
421
422 return {}
423
434
436 if self.listenToTimer:
437 self.ordinaryEnd=False
438 f=open(path.join(self.basename,"stop"),"w")
439 f.write("Geh z'haus")
440 f.close()
441 unlink(self.stopFile())
442 else:
443 warning("I'm not listening to your callbacks")
444
446 """A Cluster-Job that executes a solver. It implements the run-function.
447 If a template-case is specified, the case is copied"""
448
449 - def __init__(self,basename,solver,
450 template=None,
451 cloneParameters=[],
452 arrayJob=False,
453 hardRestart=False,
454 autoParallel=True,
455 doAutoReconstruct=None,
456 foamVersion=None,
457 compileOption=None,
458 useFoamMPI=False,
459 steady=False,
460 multiRegion=False,
461 parameters={},
462 progress=False,
463 solverProgress=False,
464 solverNoLog=False,
465 isDecomposed=False):
466 """@param template: Name of the template-case. It is assumed that
467 it resides in the same directory as the actual case
468 @param cloneParameters: a list with additional parameters for the
469 CloneCase-object that copies the template
470 @param solverProgress: Only writes the current time of the solver"""
471
472 ClusterJob.__init__(self,basename,
473 arrayJob=arrayJob,
474 hardRestart=hardRestart,
475 autoParallel=autoParallel,
476 doAutoReconstruct=doAutoReconstruct,
477 foamVersion=foamVersion,
478 compileOption=compileOption,
479 useFoamMPI=useFoamMPI,
480 multiRegion=multiRegion,
481 parameters=parameters,
482 isDecomposed=isDecomposed)
483 self.solver=solver
484 self.steady=steady
485 if template!=None and not self.restarted:
486 template=path.join(path.dirname(self.casedir()),template)
487 if path.abspath(basename)==path.abspath(template):
488 error("The basename",basename,"and the template",template,"are the same directory")
489 if isDecomposed:
490 cloneParameters+=["--parallel"]
491 clone=CloneCase(
492 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
493 self.solverProgress=solverProgress
494 self.solverNoLog=solverNoLog
495
496 - def run(self,parameters):
497 self.foamRun(self.solver,
498 steady=self.steady,
499 multiRegion=False,
500 progress=self.solverProgress,
501 noLog=self.solverNoLog)
502
503
504