1
2 """Encapsulates all necessary things for a cluster-job, like setting
3 up, running, restarting"""
4
5 import os,sys
6 from os import path,unlink
7 from threading import Thread,Lock,Timer
8
9 from PyFoam.Applications.Decomposer import Decomposer
10 from PyFoam.Applications.Runner import Runner
11 from PyFoam.Applications.SteadyRunner import SteadyRunner
12 from PyFoam.Applications.CloneCase import CloneCase
13 from PyFoam.FoamInformation import changeFoamVersion
14 from PyFoam.Error import error,warning
15 from PyFoam import configuration as config
16 from PyFoam.FoamInformation import oldAppConvention as oldApp
17 from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
18
32
33
35 """ All Cluster-jobs are to be derived from this base-class
36
37 The actual jobs are implemented by overriding methods
38
39 There is a number of variables in this class that are used to
40 'communicate' information between the various stages"""
41
42 - def __init__(self,
43 basename,
44 arrayJob=False,
45 hardRestart=False,
46 autoParallel=True,
47 doAutoReconstruct=None,
48 foamVersion=None,
49 compileOption=None,
50 useFoamMPI=False,
51 multiRegion=False,
52 isDecomposed=False):
53 """Initializes the Job
54 @param basename: Basis name of the job
55 @param arrayJob: this job is a parameter variation. The tasks
56 are identified by their task-id
57 @param hardRestart: treat the job as restarted
58 @param autoParallel: Parallelization is handled by the base-class
59 @param doAutoReconstruct: Automatically reconstruct the case if
60 autoParalellel is set. If the value is None then it is looked up from
61 the configuration
62 @param foamVersion: The foam-Version that is to be used
63 @param compileOption: Forces compile-option (usually 'Opt' or 'Debug')
64 @param useFoamMPI: Use the OpenMPI supplied with OpenFOAM
65 @param multiRegion: This job consists of multiple regions
66 @param isDecomposed: Assume that the job is already decomposed"""
67
68
69
70 if not os.environ.has_key("JOB_ID"):
71 error("Not an SGE-job. Environment variable JOB_ID is missing")
72 self.jobID=int(os.environ["JOB_ID"])
73 self.jobName=os.environ["JOB_NAME"]
74
75 self.basename=path.join(path.abspath(path.curdir),basename)
76
77 sgeRestarted=False
78 if os.environ.has_key("RESTARTED"):
79 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
80
81 if sgeRestarted or hardRestart:
82 self.restarted=True
83 else:
84 self.restarted=False
85
86 if foamVersion==None:
87 foamVersion=config().get("OpenFOAM","Version")
88
89 changeFoamVersion(foamVersion,compileOption=compileOption)
90
91 if not os.environ.has_key("WM_PROJECT_VERSION"):
92 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
93
94 self.autoParallel=autoParallel
95
96 self.doAutoReconstruct=doAutoReconstruct
97 if self.doAutoReconstruct==None:
98 self.doAutoReconstruct=config().getboolean("ClusterJob","doAutoReconstruct")
99
100 self.multiRegion=multiRegion
101
102 self.hostfile=None
103 self.nproc=1
104
105 if os.environ.has_key("NSLOTS"):
106 self.nproc=int(os.environ["NSLOTS"])
107 self.message("Running on",self.nproc,"CPUs")
108 if self.nproc>1:
109
110 self.hostfile=path.join(os.environ["TMP"],"machines")
111 self.message("Using the machinefile",self.hostfile)
112 self.message("Contents of the machinefile:",open(self.hostfile).readlines())
113
114 self.ordinaryEnd=True
115 self.listenToTimer=False
116
117 self.taskID=None
118 self.arrayJob=arrayJob
119
120 if self.arrayJob:
121 self.taskID=int(os.environ["SGE_TASK_ID"])
122
123 if not useFoamMPI and not foamVersion in eval(config().get("ClusterJob","useFoamMPI",default='[]')):
124
125 self.message("Adding Cluster-specific paths")
126 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
127 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
128
129 self.isDecomposed=isDecomposed
130
132 """Return a string with the full job-ID"""
133 result=str(self.jobID)
134 if self.arrayJob:
135 result+=":"+str(self.taskID)
136 return result
137
139 print "=== CLUSTERJOB: ",
140 for t in txt:
141 print t,
142 print " ==="
143 sys.stdout.flush()
144
151
153 """The file with the job information"""
154 jobfile="%s.%d" % (self.jobName,self.jobID)
155 if self.arrayJob:
156 jobfile+=".%d" % self.taskID
157 jobfile+=".pyFoam.clusterjob"
158 jobfile=path.join(path.dirname(self.basename),jobfile)
159
160 return jobfile
161
163 """The file that makes the job write a checkpoint"""
164 return self.jobFile()+".checkpoint"
165
167 """The file that makes the job write a checkpoint and end"""
168 return self.jobFile()+".stop"
169
234
236 """Returns the actual directory of the case
237 To be overridden if appropriate"""
238 if self.arrayJob:
239 return "%s.%05d" % (self.basename,self.taskID)
240 else:
241 return self.basename
242
244 """Returns just the name of the case"""
245 return path.basename(self.casedir())
246
247 - def foamRun(self,application,
248 args=[],
249 foamArgs=[],
250 steady=False,
251 multiRegion=None,
252 progress=False,
253 noLog=False):
254 """Runs a foam utility on the case.
255 If it is a parallel job and the grid has
256 already been decomposed (and not yet reconstructed) it is run in
257 parallel
258 @param application: the Foam-Application that is to be run
259 @param foamArgs: A list if with the additional arguments for the
260 Foam-Application
261 @param args: A list with additional arguments for the Runner-object
262 @param steady: Use the steady-runner
263 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)
264 @param progress: Only output the time and nothing else
265 @param noLog: Do not generate a logfile"""
266
267 arglist=args[:]
268 arglist+=["--job-id=%s" % self.fullJobId()]
269
270 if self.isDecomposed and self.nproc>1:
271 arglist+=["--procnr=%d" % self.nproc,
272 "--machinefile=%s" % self.hostfile]
273
274 if progress:
275 arglist+=["--progress"]
276 if noLog:
277 arglist+=["--no-log"]
278
279 if self.multiRegion:
280 if multiRegion==None or multiRegion==True:
281 arglist+=["--all-regions"]
282 elif multiRegion and not self.multiRegion:
283 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
284
285 if self.restarted:
286 arglist+=["--restart"]
287
288 arglist+=[application]
289 if oldApp():
290 arglist+=[".",self.casename()]
291 else:
292 arglist+=["-case",self.casename()]
293
294 arglist+=foamArgs
295
296 self.message("Executing",arglist)
297
298 if steady:
299 self.message("Running Steady")
300 runner=SteadyRunner(args=arglist)
301 else:
302 runner=Runner(args=arglist)
303
305 """Automatically decomposes the grid with a metis-algorithm"""
306
307 if path.isdir(path.join(self.casedir(),"processor0")):
308 warning("A processor directory already exists. There might be a problem")
309 args=["--method=metis",
310 "--clear",
311 self.casename(),
312 self.nproc,
313 "--job-id=%s" % self.fullJobId()]
314
315 if self.multiRegion:
316 args.append("--all-regions")
317
318 deco=Decomposer(args=args)
319
321 """Default reconstruction of a parallel run"""
322
323 if self.doAutoReconstruct:
324 self.isDecomposed=False
325
326 self.foamRun("reconstructPar",
327 args=["--logname=ReconstructPar"])
328 else:
329 self.message("No reconstruction (because asked to)")
330
331 - def setup(self,parameters):
332 """Set up the job. Called in the beginning if the
333 job has not been restarted
334
335 Usual tasks include grid conversion/setup, mesh decomposition etc
336
337 @param parameters: a dictionary with parameters"""
338
339 pass
340
341 - def postDecomposeSetup(self,parameters):
342 """Additional setup, to be executed when the grid is already decomposed
343
344 Usually for tasks that can be done on a decomposed grid
345
346 @param parameters: a dictionary with parameters"""
347
348 pass
349
350 - def run(self,parameters):
351 """Run the actual job. Usually the solver.
352 @param parameters: a dictionary with parameters"""
353
354 pass
355
357 """Additional cleanup, to be executed when the grid is still decomposed
358
359 Usually for tasks that can be done on a decomposed grid
360
361 @param parameters: a dictionary with parameters"""
362
363 pass
364
366 """Clean up after a job
367 @param parameters: a dictionary with parameters"""
368
369 pass
370
372 """Additional reconstruction of parallel runs (Stuff that the
373 OpenFOAM-reconstructPar doesn't do
374 @param parameters: a dictionary with parameters"""
375
376 pass
377
379 """Parameters for a specific task
380 @param id: the id of the task
381 @return: a dictionary with parameters for this task"""
382
383 error("taskParameter not implemented. Not a parameterized job")
384
385 return {}
386
397
399 if self.listenToTimer:
400 self.ordinaryEnd=False
401 f=open(path.join(self.basename,"stop"),"w")
402 f.write("Geh z'haus")
403 f.close()
404 unlink(self.stopFile())
405 else:
406 warning("I'm not listening to your callbacks")
407
409 """A Cluster-Job that executes a solver. It implements the run-function.
410 If a template-case is specified, the case is copied"""
411
412 - def __init__(self,basename,solver,
413 template=None,
414 cloneParameters=[],
415 arrayJob=False,
416 hardRestart=False,
417 autoParallel=True,
418 doAutoReconstruct=None,
419 foamVersion=None,
420 compileOption=None,
421 useFoamMPI=False,
422 steady=False,
423 multiRegion=False,
424 progress=False,
425 solverProgress=False,
426 solverNoLog=False,
427 isDecomposed=False):
428 """@param template: Name of the template-case. It is assumed that
429 it resides in the same directory as the actual case
430 @param cloneParameters: a list with additional parameters for the
431 CloneCase-object that copies the template
432 @param solverProgress: Only writes the current time of the solver"""
433
434 ClusterJob.__init__(self,basename,
435 arrayJob=arrayJob,
436 hardRestart=hardRestart,
437 autoParallel=autoParallel,
438 doAutoReconstruct=doAutoReconstruct,
439 foamVersion=foamVersion,
440 compileOption=compileOption,
441 useFoamMPI=useFoamMPI,
442 multiRegion=multiRegion,
443 isDecomposed=isDecomposed)
444 self.solver=solver
445 self.steady=steady
446 if template!=None and not self.restarted:
447 template=path.join(path.dirname(self.casedir()),template)
448 if path.abspath(basename)==path.abspath(template):
449 error("The basename",basename,"and the template",template,"are the same directory")
450 if isDecomposed:
451 cloneParameters+=["--parallel"]
452 clone=CloneCase(
453 args=cloneParameters+[template,self.casedir(),"--follow-symlinks"])
454 self.solverProgress=solverProgress
455 self.solverNoLog=solverNoLog
456
457 - def run(self,parameters):
458 self.foamRun(self.solver,
459 steady=self.steady,
460 multiRegion=False,
461 progress=self.solverProgress,
462 noLog=self.solverNoLog)
463