1
2 """
3 Data that can go into a spreadsheet (title line and rectangular data)
4 """
5
6 import numpy,copy
7
8 from PyFoam.Error import error,FatalErrorPyFoamException,warning
9
13
15 """
16 Collects data that could go into a spreadsheet. The focus of this class is on
17 storing all the data at once
18 """
19 - def __init__(self,
20 timeName=None,
21 validData=None,
22 csvName=None,
23 txtName=None,
24 data=None,
25 names=None,
26 title=None):
27 """Either this is constructed from a file or from the data and the column headers
28
29 @param timeName: the data colum that is to be considered the time in this file
30 @param validData: names of the valid data columns (all others should be discarded)
31 @param csvName: name of the CSV-file the data should be constructed from,
32 @param txtName: name of a file the data should be constructed from,
33 @param data: the actual data to use
34 @param names: the names for the column header
35 @param title: a name that is used to make unique heades names"""
36
37 self.title=title
38
39 if (csvName or txtName) and data:
40 error("SpreadsheetData is either constructed from data or from a file")
41
42 if csvName:
43 try:
44 rec=numpy.recfromcsv(csvName)
45 data=[tuple(float(x) for x in i) for i in rec]
46 names=list(rec.dtype.names)
47 except AttributeError:
48
49 data=map(tuple,numpy.loadtxt(csvName,delimiter=',',skiprows=1))
50 names=open(csvName).readline().strip().split(',')
51
52
53 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names)))
54 elif txtName:
55 try:
56 rec=numpy.recfromtxt(txtName,names=True)
57 data=[tuple(float(x) for x in i) for i in rec]
58 names=list(rec.dtype.names)
59 except AttributeError:
60
61 data=map(tuple,numpy.loadtxt(txtName))
62 names=open(txtName).readline().strip().split()[1:]
63
64
65 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names)))
66 else:
67 if data!=None and names==None:
68 error("No names given for the data")
69
70 self.data=numpy.array(map(tuple,data),dtype=zip(names,['f8']*len(names)))
71
72 if timeName:
73 try:
74 index=list(self.data.dtype.names).index(timeName)
75 except ValueError:
76 error("Time name",timeName,"not in",self.data.dtype.names)
77 else:
78 index=0
79 self.time=self.data.dtype.names[index]
80
81 if validData:
82 usedData=[]
83 usedNames=[]
84
85 for n in self.data.dtype.names:
86 if n==self.time or n in validData:
87 usedData.append(tuple(self.data[n]))
88 usedNames.append(n)
89
90 usedData=numpy.array(usedData).transpose()
91 self.data=numpy.array(map(tuple,usedData),dtype=zip(usedNames,['f8']*len(usedNames)))
92 index=list(self.data.dtype.names).index(self.time)
93
94 if self.title!=None:
95 self.data.dtype.names=map(lambda x:self.title+" "+x,self.data.dtype.names[0:index])+[self.data.dtype.names[index]]+map(lambda x:self.title+" "+x,self.data.dtype.names[index+1:])
96
99
101 return self.data.size
102
103 - def writeCSV(self,fName,
104 delimiter=","):
105 """Write data to a CSV-file
106 @param fName: Name of the file
107 @param delimiter: Delimiter to be used in the CSV-file"""
108
109 f=open(fName,"w")
110 f.write(delimiter.join(self.names())+"\n")
111 numpy.savetxt(f,self.data,delimiter=delimiter)
112
114 """Return the range of times
115 @param time: name of the time. If None the first column is used"""
116 if time==None:
117 time=self.time
118 t=self.data[time]
119
120 return (t[0],t[-1])
121
122 - def join(self,other,time=None,prefix=None):
123 """Join this object with another. Assume that they have the same
124 amount of rows and that they have one column that designates the
125 time and is called the same and has the same values
126 @param other: the other array
127 @param time: name of the time. If None the first column is used
128 @param prefix: String that is added to the other names. If none is given then
129 the title is used"""
130 if time==None:
131 time=self.time
132 if prefix==None:
133 prefix=other.title
134 if prefix==None:
135 prefix="other_"
136 else:
137 prefix+="_"
138
139 t1=self.data[time]
140 t2=other.data[time]
141 if len(t1)!=len(t2):
142 raise WrongDataSize()
143 if max(abs(t1-t2))>1e-10:
144 error("Times do not have the same values")
145
146 names=[]
147 data=[]
148 for n in self.names():
149 names.append(n)
150 data.append(self.data[n])
151
152 for n in other.names():
153 if n!=time:
154 if n in self.names():
155 names.append(prefix+n)
156 else:
157 names.append(n)
158 data.append(other.data[n])
159
160 return SpreadsheetData(names=names,
161 data=numpy.array(data).transpose())
162
164 """Convinience function for joining data"""
165 return self.join(other)
166
167 - def append(self,
168 name,
169 data,
170 allowDuplicates=False):
171 """Add another column to the data. Assumes that the number of rows is right
172 @param name: the name of the column
173 @param data: the actual data
174 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ..."""
175
176 arr = numpy.asarray(data)
177 newname=name
178 if newname in self.names() and allowDuplicates:
179 cnt=1
180 while newname in self.names():
181 newname="%s_%d" % (name,cnt)
182 cnt+=1
183 warning("Changing name",name,"to",newname,"bacause it already exists in the data")
184 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')])
185 newrec = numpy.empty(self.data.shape, dtype=newdtype)
186 for field in self.data.dtype.fields:
187 newrec[field] = self.data[field]
188 newrec[name] = arr
189
190 self.data=newrec
191
192 - def __call__(self,
193 t,
194 name,
195 time=None,
196 invalidExtend=False,
197 noInterpolation=False):
198 """'Evaluate' the data at a specific time by linear interpolation
199 @param t: the time at which the data should be evaluated
200 @param name: name of the data column to be evaluated. Assumes that that column
201 is ordered in ascending order
202 @param time: name of the time column. If none is given then the first column is assumed
203 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan
204 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
205
206 if time==None:
207 time=self.time
208
209 x=self.data[time]
210 y=self.data[name]
211
212
213 if t<x[0]:
214 if invalidExtend:
215 return y[0]
216 else:
217 return float('nan')
218 elif t>x[-1]:
219 if invalidExtend:
220 return y[-1]
221 else:
222 return float('nan')
223
224 if noInterpolation:
225 if t==x[0]:
226 return y[0]
227 elif t==x[-1]:
228 return y[-1]
229
230 iLow=0
231 iHigh=len(x)-1
232
233 while (iHigh-iLow)>1:
234 iNew = iLow + (iHigh-iLow)/2
235
236 if x[iNew]==t:
237
238 return y[iNew]
239 elif t < x[iNew]:
240 iHigh=iNew
241 else:
242 iLow=iNew
243 if noInterpolation:
244 return float('nan')
245 else:
246 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
247
248 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
249 """Extend the data so that all new times are represented (add rows
250 if they are not there)
251 @param time: the name of the column with the time
252 @param times: the times that shoild be there
253 @param interpolate: interpolate the data in new rows. Otherwise
254 insert 'nan'
255 @param invalidExtend: if t is out of the valid range then use
256 the smallest or the biggest value. If False use nan"""
257
258 if time==None:
259 time=self.time
260
261 if len(times)==len(self.data[time]):
262 same=True
263 for i in range(len(times)):
264 if times[i]!=self.data[time][i]:
265 same=False
266 break
267 if same:
268
269 return
270
271 newData=[]
272 otherI=0
273 originalI=0
274 while otherI<len(times):
275 goOn=originalI<len(self.data[time])
276 while goOn and times[otherI]>self.data[time][originalI]:
277 newData.append(self.data[originalI])
278 originalI+=1
279 goOn=originalI<len(self.data[time])
280
281 append=True
282 if originalI<len(self.data[time]):
283 if times[otherI]==self.data[time][originalI]:
284 newData.append(self.data[originalI])
285 originalI+=1
286 otherI+=1
287 append=False
288
289 if append:
290 t=times[otherI]
291 newRow=[]
292 for n in self.names():
293 if n==time:
294 newRow.append(t)
295 elif interpolate:
296 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend))
297 else:
298 newRow.append(float('nan'))
299 newData.append(newRow)
300 otherI+=1
301
302 while originalI<len(self.data[time]):
303 newData.append(self.data[originalI])
304 originalI+=1
305
306 self.data=numpy.array(map(tuple,newData),dtype=self.data.dtype)
307
308 - def resample(self,
309 other,
310 name,
311 otherName=None,
312 time=None,
313 invalidExtend=False,
314 extendData=False,
315 noInterpolation=False):
316 """Calculate values from another dataset at the same times as in this data-set
317 @param other: the other data-set
318 @param name: name of the data column to be evaluated. Assumes that that column
319 is ordered in ascending order
320 @param time: name of the time column. If none is given then the first column is assumed
321 @param invalidExtend: see __call__
322 @param extendData: if the time range of x is bigger than the range then extend the range before resampling
323 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
324 if time==None:
325 time=self.time
326
327 if extendData and (
328 self.data[time][0] > other.data[time][0] or \
329 self.data[time][-1] < other.data[time][-1]):
330 pre=[]
331 i=0
332 while other.data[time][i] < self.data[time][0]:
333 data=[]
334 for n in self.names():
335 if n==time:
336 data.append(other.data[time][i])
337 else:
338 data.append(float('nan'))
339 pre.append(data)
340 i+=1
341 if i>=len(other.data[time]):
342 break
343 if len(pre)>0:
344 self.data=numpy.concatenate((numpy.array(map(tuple,pre),dtype=self.data.dtype),self.data))
345
346 post=[]
347 i=-1
348 while other.data[time][i] > self.data[time][-1]:
349 data=[]
350 for n in self.names():
351 if n==time:
352 data.append(other.data[time][i])
353 else:
354 data.append(float('nan'))
355 post.append(data)
356 i-=1
357 if abs(i)>=len(other.data[time])+1:
358 break
359
360 post.reverse()
361 if len(post)>0:
362 self.data=numpy.concatenate((self.data,numpy.array(map(tuple,post),dtype=self.data.dtype)))
363
364 result=[]
365
366 for t in self.data[time]:
367 nm=name
368 if otherName:
369 nm=otherName
370 result.append(other(t,nm,
371 time=time,
372 invalidExtend=invalidExtend,
373 noInterpolation=noInterpolation))
374
375 return result
376
377 - def compare(self,other,name,otherName=None,time=None,common=False):
378 """Compare this data-set with another. The time-points of this dataset are used as
379 a reference. Returns a dictionary with a number of norms: maximum absolute
380 difference, average absolute difference
381 on all timepoints, average absolute difference weighted by time
382 @param other: the other data-set
383 @param name: name of the data column to be evaluated. Assumes that that column
384 is ordered in ascending order
385 @param time: name of the time column. If none is given then the first column is assumed
386 @param common: cut off the parts where not both data sets are defined"""
387
388 if time==None:
389 time=self.time
390
391 x=self.data[time]
392 y=self.data[name]
393 y2=self.resample(other,name,otherName=otherName,time=time,invalidExtend=True)
394
395 minT,maxT=None,None
396 if common:
397 minTmp,maxTmp=max(x[0],other.data[time][0]),min(x[-1],other.data[time][-1])
398 for i in range(len(x)):
399 if minTmp<=x[i]:
400 minT=x[i]
401 break
402 for i in range(len(x)):
403 val=x[-(i+1)]
404 if maxTmp>=val:
405 maxT=val
406 break
407 else:
408 minT,maxT=x[0],x[-1]
409
410 if minT==None or maxT==None:
411 return { "max" : None,
412 "maxPos" : None,
413 "average" : None,
414 "wAverage" : None,
415 "tMin": None,
416 "tMax": None }
417
418 maxDiff=0
419 maxPos=x[0]
420 sumDiff=0
421 sumWeighted=0
422 cnt=0
423
424 for i,t in enumerate(x):
425 if t<minT or t>maxT:
426 continue
427 cnt+=1
428
429 val1=y[i]
430 val2=y2[i]
431 diff=abs(val1-val2)
432 if diff>maxDiff:
433 maxDiff=diff
434 maxPos=x[i]
435 sumDiff+=diff
436 weight=0
437 if t>minT:
438 weight+=(t-x[i-1])/2
439 if t<maxT:
440 weight+=(x[i+1]-t)/2
441 sumWeighted+=weight*diff
442
443 return { "max" : maxDiff,
444 "maxPos" : maxPos,
445 "average" : sumDiff/cnt,
446 "wAverage" : sumWeighted/(maxT-minT),
447 "tMin": minT,
448 "tMax": maxT}
449
451 """Calculates the metrics for a data set. Returns a dictionary
452 with a number of norms: minimum, maximum, average, average weighted by time
453 @param name: name of the data column to be evaluated. Assumes that that column
454 is ordered in ascending order
455 @param time: name of the time column. If none is given then the first column is assumed"""
456
457 if time==None:
458 time=self.time
459
460 x=self.data[time]
461 y=self.data[name]
462
463 minVal=1e40
464 maxVal=-1e40
465 sum=0
466 sumWeighted=0
467
468 for i,t in enumerate(x):
469 val=y[i]
470 maxVal=max(val,maxVal)
471 minVal=min(val,minVal)
472 sum+=val
473 weight=0
474 if i>0:
475 weight+=(t-x[i-1])/2
476 if i<(len(x)-1):
477 weight+=(x[i+1]-t)/2
478 sumWeighted+=weight*val
479
480 return { "max" : maxVal,
481 "min" : minVal,
482 "average" : sum/len(x),
483 "wAverage" : sumWeighted/(x[-1]-x[0]),
484 "tMin": x[0],
485 "tMax": x[-1]}
486