1
2 """Encapsulates all necessary things for a cluster-job, like setting up, running, restarting"""
3
4 import os
5 from os import path,unlink
6 from threading import Thread,Lock,Timer
7
8 from PyFoam.Applications.Decomposer import Decomposer
9 from PyFoam.Applications.Runner import Runner
10 from PyFoam.Applications.SteadyRunner import SteadyRunner
11 from PyFoam.Applications.CloneCase import CloneCase
12 from PyFoam.FoamInformation import changeFoamVersion
13 from PyFoam.Error import error,warning
14 from PyFoam import configuration as config
15 from PyFoam.FoamInformation import oldAppConvention as oldApp
16
30
31
33 """ All Cluster-jobs are to be derived from this base-class
34
35 The actual jobs are implemented by overriding methods
36
37 There is a number of variables in this class that are used to
38 'communicate' information between the various stages"""
39
40 - def __init__(self,basename,arrayJob=False,hardRestart=False,autoParallel=True,foamVersion=None,multiRegion=False):
41 """Initializes the Job
42 @param basename: Basis name of the job
43 @param arrayJob: this job is a parameter variation. The tasks
44 are identified by their task-id
45 @param hardRestart: treat the job as restarted
46 @param autoParallel: Parallelization is handled by the base-class
47 @param foamVersion: The foam-Version that is to be used
48 @param multiRegion: This job consists of multiple regions"""
49
50
51
52 if not os.environ.has_key("JOB_ID"):
53 error("Not an SGE-job. Environment variable JOB_ID is missing")
54 self.jobID=int(os.environ["JOB_ID"])
55 self.jobName=os.environ["JOB_NAME"]
56
57 self.basename=path.join(path.abspath(path.curdir),basename)
58
59 sgeRestarted=False
60 if os.environ.has_key("RESTARTED"):
61 sgeRestarted=(int(os.environ["RESTARTED"])!=0)
62
63 if sgeRestarted or hardRestart:
64 self.restarted=True
65 else:
66 self.restarted=False
67
68 if foamVersion==None:
69 foamVersion=config().get("OpenFOAM","Version")
70
71 changeFoamVersion(foamVersion)
72
73 if not os.environ.has_key("WM_PROJECT_VERSION"):
74 error("No OpenFOAM-Version seems to be configured. Set the foamVersion-parameter")
75
76 self.autoParallel=autoParallel
77 self.multiRegion=multiRegion
78
79 self.hostfile=None
80 self.nproc=1
81
82 if os.environ.has_key("NSLOTS"):
83 self.nproc=int(os.environ["NSLOTS"])
84 if self.nproc>1:
85
86 self.hostfile=path.join(os.environ["TMP"],"machines")
87
88 self.ordinaryEnd=True
89 self.listenToTimer=False
90
91 self.taskID=None
92 self.arrayJob=arrayJob
93
94 if self.arrayJob:
95 self.taskID=int(os.environ["SGE_TASK_ID"])
96
97
98 os.environ["PATH"]=config().get("ClusterJob","path")+":"+os.environ["PATH"]
99 os.environ["LD_LIBRARY_PATH"]=config().get("ClusterJob","ldpath")+":"+os.environ["LD_LIBRARY_PATH"]
100
101 self.isDecomposed=False
102
104 print "=== CLUSTERJOB: ",
105 for t in txt:
106 print t,
107 print " ==="
108
110 self.message("Setting Job state to",txt)
111 fName=path.join(self.casedir(),"ClusterJobState")
112 f=open(fName,"w")
113 f.write(txt+"\n")
114 f.close()
115
117 """The file with the job information"""
118 jobfile="%s.%d" % (self.jobName,self.jobID)
119 if self.arrayJob:
120 jobfile+=".%d" % self.taskID
121 jobfile+=".pyFoam.clusterjob"
122 jobfile=path.join(path.dirname(self.basename),jobfile)
123
124 return jobfile
125
127 """The file that makes the job write a checkpoint"""
128 return self.jobFile()+".checkpoint"
129
131 """The file that makes the job write a checkpoint and end"""
132 return self.jobFile()+".stop"
133
200
202 """Returns the actual directory of the case
203 To be overridden if appropriate"""
204 if self.arrayJob:
205 return "%s.%05d" % (self.basename,self.taskID)
206 else:
207 return self.basename
208
210 """Returns just the name of the case"""
211 return path.basename(self.casedir())
212
213 - def foamRun(self,application,args=[],foamArgs=[],steady=False,multiRegion=None):
214 """Runs a foam utility on the case.
215 If it is a parallel job and the grid has
216 already been decomposed (and not yet reconstructed) it is run in
217 parallel
218 @param application: the Foam-Application that is to be run
219 @param foamArgs: A list if with the additional arguments for the
220 Foam-Application
221 @param args: A list with additional arguments for the Runner-object
222 @param steady: Use the steady-runner
223 @param multiRegion: Run this on multiple regions (if None: I don't have an opinion on this)"""
224
225 arglist=args[:]
226 if self.isDecomposed and self.nproc>1:
227 arglist+=["--procnr=%d" % self.nproc,
228 "--machinefile=%s" % self.hostfile]
229
230 if self.multiRegion:
231 if multiRegion==None or multiRegion==True:
232 arglist+=["--all-regions"]
233 elif multiRegion and not self.multiRegion:
234 warning("This is not a multi-region case, so trying to run stuff multi-region won't do any good")
235
236 if self.restarted:
237 arglist+=["--restart"]
238
239 arglist+=[application]
240 if oldApp():
241 arglist+=[".",self.casename()]
242 else:
243 arglist+=["-case",self.casename()]
244
245 arglist+=foamArgs
246
247 self.message("Executing",arglist)
248
249 if steady:
250 self.message("Running Steady")
251 runner=SteadyRunner(args=arglist)
252 else:
253 runner=Runner(args=arglist)
254
256 """Automatically decomposes the grid with a metis-algorithm"""
257
258 if path.isdir(path.join(self.casedir(),"processor0")):
259 warning("A processor directory already exists. There might be a problem")
260 args=["--method=metis",
261 "--clear",
262 self.casename(),
263 self.nproc]
264
265 if self.multiRegion:
266 args.append("--all-regions")
267
268 deco=Decomposer(args=args)
269
271 """Default reconstruction of a parallel run"""
272
273 self.foamRun("reconstructPar",
274 args=["--logname=ReconstructPar"])
275
276 - def setup(self,parameters):
277 """Set up the job. Called in the beginning if the
278 job has not been restarted
279
280 Usual tasks include grid conversion/setup, mesh decomposition etc
281
282 @param parameters: a dictionary with parameters"""
283
284 pass
285
286 - def postDecomposeSetup(self,parameters):
287 """Additional setup, to be executed when the grid is already decomposed
288
289 Usually for tasks that can be done on a decomposed grid
290
291 @param parameters: a dictionary with parameters"""
292
293 pass
294
295 - def run(self,parameters):
296 """Run the actual job. Usually the solver.
297 @param parameters: a dictionary with parameters"""
298
299 pass
300
302 """Additional cleanup, to be executed when the grid is still decomposed
303
304 Usually for tasks that can be done on a decomposed grid
305
306 @param parameters: a dictionary with parameters"""
307
308 pass
309
311 """Clean up after a job
312 @param parameters: a dictionary with parameters"""
313
314 pass
315
317 """Additional reconstruction of parallel runs (Stuff that the
318 OpenFOAM-reconstructPar doesn't do
319 @param parameters: a dictionary with parameters"""
320
321 pass
322
324 """Parameters for a specific task
325 @param id: the id of the task
326 @return: a dictionary with parameters for this task"""
327
328 error("taskParameter not implemented. Not a parameterized job")
329
330 return {}
331
342
344 if self.listenToTimer:
345 self.ordinaryEnd=False
346 f=open(path.join(self.basename,"stop"),"w")
347 f.write("Geh z'haus")
348 f.close()
349 unlink(self.stopFile())
350 else:
351 warning("I'm not listening to your callbacks")
352
354 """A Cluster-Job that executes a solver. It implements the run-function.
355 If a template-case is specified, the case is copied"""
356
357 - def __init__(self,basename,solver,template=None,cloneParameters=[],arrayJob=False,hardRestart=False,autoParallel=True,foamVersion=None,steady=False,multiRegion=False):
358 """@param template: Name of the template-case. It is assumed that
359 it resides in the same directory as the actual case
360 @param cloneParameters: a list with additional parameters for the
361 CloneCase-object that copies the template"""
362
363 ClusterJob.__init__(self,basename,arrayJob=arrayJob,hardRestart=hardRestart,autoParallel=autoParallel,foamVersion=foamVersion,multiRegion=multiRegion)
364 self.solver=solver
365 self.steady=steady
366 if template!=None and not self.restarted:
367 template=path.join(path.dirname(self.casedir()),template)
368 if path.abspath(basename)==path.abspath(template):
369 error("The basename",basename,"and the template",template,"are the same directory")
370 clone=CloneCase(args=cloneParameters+[template,self.casedir()])
371
372 - def run(self,parameters):
373 self.foamRun(self.solver,steady=self.steady,multiRegion=False)
374