1
2 """
3 Data that can go into a spreadsheet (title line and rectangular data)
4 """
5
6 try:
7 import numpy
8 except ImportError:
9
10 import numpypy
11 import numpy
12
13 import copy
14
15 from PyFoam.Error import error,FatalErrorPyFoamException,warning
16
17 from PyFoam.ThirdParty.six import PY3
18 from PyFoam.ThirdParty.six import b as toByte
19
23
25 """
26 Collects data that could go into a spreadsheet. The focus of this class is on
27 storing all the data at once
28 """
29 - def __init__(self,
30 timeName=None,
31 validData=None,
32 csvName=None,
33 txtName=None,
34 data=None,
35 names=None,
36 title=None):
37 """Either this is constructed from a file or from the data and the column headers
38
39 @param timeName: the data colum that is to be considered the time in this file
40 @param validData: names of the valid data columns (all others should be discarded)
41 @param csvName: name of the CSV-file the data should be constructed from,
42 @param txtName: name of a file the data should be constructed from,
43 @param data: the actual data to use
44 @param names: the names for the column header
45 @param title: a name that is used to make unique heades names"""
46
47 self.title=title
48
49 if (csvName or txtName) and data:
50 error("SpreadsheetData is either constructed from data or from a file")
51
52 if csvName:
53 try:
54 rec=numpy.recfromcsv(csvName)
55 data=[tuple(float(x) for x in i) for i in rec]
56 names=list(rec.dtype.names)
57 except AttributeError:
58
59 data=list(map(tuple,numpy.loadtxt(csvName,
60 delimiter=',',
61 skiprows=1)))
62 names=open(csvName).readline().strip().split(',')
63
64
65 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names))))
66 elif txtName:
67 try:
68 rec=numpy.recfromtxt(txtName,names=True)
69 data=[tuple(float(x) for x in i) for i in rec]
70 names=list(rec.dtype.names)
71 except AttributeError:
72
73 data=list(map(tuple,numpy.loadtxt(txtName)))
74 names=open(txtName).readline().strip().split()[1:]
75
76
77 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names))))
78 else:
79 if data!=None and names==None:
80 error("No names given for the data")
81
82 self.data=numpy.array(list(map(tuple,data)),
83 dtype=list(zip(names,['f8']*len(names))))
84
85 if timeName:
86 try:
87 index=list(self.data.dtype.names).index(timeName)
88 except ValueError:
89 error("Time name",timeName,"not in",self.data.dtype.names)
90 else:
91 index=0
92 self.time=self.data.dtype.names[index]
93
94 if validData:
95 usedData=[]
96 usedNames=[]
97
98 for n in self.data.dtype.names:
99 if n==self.time or n in validData:
100 usedData.append(tuple(self.data[n]))
101 usedNames.append(n)
102
103 usedData=numpy.array(usedData).transpose()
104 self.data=numpy.array(list(map(tuple,usedData)),
105 dtype=list(zip(usedNames,['f8']*len(usedNames))))
106 index=list(self.data.dtype.names).index(self.time)
107
108 if self.title!=None:
109 self.data.dtype.names=[self.title+" "+x for x in self.data.dtype.names[0:index]]+[self.data.dtype.names[index]]+[self.title+" "+x for x in self.data.dtype.names[index+1:]]
110
113
115 return self.data.size
116
117 - def writeCSV(self,fName,
118 delimiter=","):
119 """Write data to a CSV-file
120 @param fName: Name of the file
121 @param delimiter: Delimiter to be used in the CSV-file"""
122
123 f=open(fName,"wb")
124 if PY3:
125 f.write(toByte(delimiter.join(self.names())+"\n"))
126 else:
127 f.write(delimiter.join(self.names())+"\n")
128
129 numpy.savetxt(f,self.data,delimiter=delimiter)
130
132 """Return the range of times
133 @param time: name of the time. If None the first column is used"""
134 if time==None:
135 time=self.time
136 t=self.data[time]
137
138 return (t[0],t[-1])
139
140 - def join(self,other,time=None,prefix=None):
141 """Join this object with another. Assume that they have the same
142 amount of rows and that they have one column that designates the
143 time and is called the same and has the same values
144 @param other: the other array
145 @param time: name of the time. If None the first column is used
146 @param prefix: String that is added to the other names. If none is given then
147 the title is used"""
148 if time==None:
149 time=self.time
150 if prefix==None:
151 prefix=other.title
152 if prefix==None:
153 prefix="other_"
154 else:
155 prefix+="_"
156
157 t1=self.data[time]
158 t2=other.data[time]
159 if len(t1)!=len(t2):
160 raise WrongDataSize()
161 if max(abs(t1-t2))>1e-10:
162 error("Times do not have the same values")
163
164 names=[]
165 data=[]
166 for n in self.names():
167 names.append(n)
168 data.append(self.data[n])
169
170 for n in other.names():
171 if n!=time:
172 if n in self.names():
173 names.append(prefix+n)
174 else:
175 names.append(n)
176 data.append(other.data[n])
177
178 return SpreadsheetData(names=names,
179 data=numpy.array(data).transpose())
180
182 """Convinience function for joining data"""
183 return self.join(other)
184
185 - def append(self,
186 name,
187 data,
188 allowDuplicates=False):
189 """Add another column to the data. Assumes that the number of rows is right
190 @param name: the name of the column
191 @param data: the actual data
192 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ..."""
193
194 arr = numpy.asarray(data)
195 newname=name
196 if newname in self.names() and allowDuplicates:
197 cnt=1
198 while newname in self.names():
199 newname="%s_%d" % (name,cnt)
200 cnt+=1
201 warning("Changing name",name,"to",newname,"bacause it already exists in the data")
202 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')])
203 newrec = numpy.empty(self.data.shape, dtype=newdtype)
204 for field in self.data.dtype.fields:
205 newrec[field] = self.data[field]
206 newrec[name] = arr
207
208 self.data=newrec
209
210 - def __call__(self,
211 t,
212 name,
213 time=None,
214 invalidExtend=False,
215 noInterpolation=False):
216 """'Evaluate' the data at a specific time by linear interpolation
217 @param t: the time at which the data should be evaluated
218 @param name: name of the data column to be evaluated. Assumes that that column
219 is ordered in ascending order
220 @param time: name of the time column. If none is given then the first column is assumed
221 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan
222 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
223
224 if time==None:
225 time=self.time
226
227 x=self.data[time]
228 y=self.data[name]
229
230
231 if t<x[0]:
232 if invalidExtend:
233 return y[0]
234 else:
235 return float('nan')
236 elif t>x[-1]:
237 if invalidExtend:
238 return y[-1]
239 else:
240 return float('nan')
241
242 if noInterpolation:
243 if t==x[0]:
244 return y[0]
245 elif t==x[-1]:
246 return y[-1]
247
248 iLow=0
249 iHigh=len(x)-1
250
251 while (iHigh-iLow)>1:
252 iNew = iLow + (iHigh-iLow)/2
253
254 if x[iNew]==t:
255
256 return y[iNew]
257 elif t < x[iNew]:
258 iHigh=iNew
259 else:
260 iLow=iNew
261 if noInterpolation:
262 return float('nan')
263 else:
264 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
265
266 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
267 """Extend the data so that all new times are represented (add rows
268 if they are not there)
269 @param time: the name of the column with the time
270 @param times: the times that shoild be there
271 @param interpolate: interpolate the data in new rows. Otherwise
272 insert 'nan'
273 @param invalidExtend: if t is out of the valid range then use
274 the smallest or the biggest value. If False use nan"""
275
276 if time==None:
277 time=self.time
278
279 if len(times)==len(self.data[time]):
280 same=True
281 for i in range(len(times)):
282 if times[i]!=self.data[time][i]:
283 same=False
284 break
285 if same:
286
287 return
288
289 newData=[]
290 otherI=0
291 originalI=0
292 while otherI<len(times):
293 goOn=originalI<len(self.data[time])
294 while goOn and times[otherI]>self.data[time][originalI]:
295 newData.append(self.data[originalI])
296 originalI+=1
297 goOn=originalI<len(self.data[time])
298
299 append=True
300 if originalI<len(self.data[time]):
301 if times[otherI]==self.data[time][originalI]:
302 newData.append(self.data[originalI])
303 originalI+=1
304 otherI+=1
305 append=False
306
307 if append:
308 t=times[otherI]
309 newRow=[]
310 for n in self.names():
311 if n==time:
312 newRow.append(t)
313 elif interpolate:
314 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend))
315 else:
316 newRow.append(float('nan'))
317 newData.append(newRow)
318 otherI+=1
319
320 while originalI<len(self.data[time]):
321 newData.append(self.data[originalI])
322 originalI+=1
323
324 self.data=numpy.array(list(map(tuple,newData)),dtype=self.data.dtype)
325
326 - def resample(self,
327 other,
328 name,
329 otherName=None,
330 time=None,
331 invalidExtend=False,
332 extendData=False,
333 noInterpolation=False):
334 """Calculate values from another dataset at the same times as in this data-set
335 @param other: the other data-set
336 @param name: name of the data column to be evaluated. Assumes that that column
337 is ordered in ascending order
338 @param time: name of the time column. If none is given then the first column is assumed
339 @param invalidExtend: see __call__
340 @param extendData: if the time range of x is bigger than the range then extend the range before resampling
341 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
342 if time==None:
343 time=self.time
344
345 if extendData and (
346 self.data[time][0] > other.data[time][0] or \
347 self.data[time][-1] < other.data[time][-1]):
348 pre=[]
349 i=0
350 while other.data[time][i] < self.data[time][0]:
351 data=[]
352 for n in self.names():
353 if n==time:
354 data.append(other.data[time][i])
355 else:
356 data.append(float('nan'))
357 pre.append(data)
358 i+=1
359 if i>=len(other.data[time]):
360 break
361 if len(pre)>0:
362 self.data=numpy.concatenate((numpy.array(list(map(tuple,pre)),
363 dtype=self.data.dtype),
364 self.data))
365
366 post=[]
367 i=-1
368 while other.data[time][i] > self.data[time][-1]:
369 data=[]
370 for n in self.names():
371 if n==time:
372 data.append(other.data[time][i])
373 else:
374 data.append(float('nan'))
375 post.append(data)
376 i-=1
377 if abs(i)>=len(other.data[time])+1:
378 break
379
380 post.reverse()
381 if len(post)>0:
382 self.data=numpy.concatenate((self.data,numpy.array(list(map(tuple,post)),
383 dtype=self.data.dtype)))
384
385 result=[]
386
387 for t in self.data[time]:
388 nm=name
389 if otherName:
390 nm=otherName
391 result.append(other(t,nm,
392 time=time,
393 invalidExtend=invalidExtend,
394 noInterpolation=noInterpolation))
395
396 return result
397
398 - def compare(self,
399 other,
400 name,
401 otherName=None,
402 time=None,
403 common=False,
404 minTime=None,
405 maxTime=None):
406 """Compare this data-set with another. The time-points of this dataset are used as
407 a reference. Returns a dictionary with a number of norms: maximum absolute
408 difference, average absolute difference
409 on all timepoints, average absolute difference weighted by time
410 @param other: the other data-set
411 @param name: name of the data column to be evaluated. Assumes that that column
412 is ordered in ascending order
413 @param time: name of the time column. If none is given then the first column is assumed
414 @param common: cut off the parts where not both data sets are defined
415 @param minTime: first time which should be compared
416 @param maxTime: last time to compare"""
417
418 if time==None:
419 time=self.time
420
421 x=self.data[time]
422 y=self.data[name]
423 y2=self.resample(other,name,otherName=otherName,time=time,invalidExtend=True)
424
425 minT,maxT=minTime,maxTime
426 if common:
427 minTmp,maxTmp=max(x[0],other.data[time][0]),min(x[-1],other.data[time][-1])
428 for i in range(len(x)):
429 if minTmp<=x[i]:
430 minT=x[i]
431 break
432 for i in range(len(x)):
433 val=x[-(i+1)]
434 if maxTmp>=val:
435 maxT=val
436 break
437 else:
438 minT,maxT=x[0],x[-1]
439
440 result = { "max" : None,
441 "maxPos" : None,
442 "average" : None,
443 "wAverage" : None,
444 "tMin": None,
445 "tMax": None }
446
447 if minT==None or maxT==None:
448 return result
449
450 if minTime:
451 if minTime>minT:
452 minT=minTime
453
454 if maxTime:
455 if maxTime<maxT:
456 maxT=maxTime
457
458 if maxT<minT:
459 return result
460
461 maxDiff=0
462 maxPos=x[0]
463 sumDiff=0
464 sumWeighted=0
465 cnt=0
466
467 for i,t in enumerate(x):
468 if t<minT or t>maxT:
469 continue
470 cnt+=1
471
472 val1=y[i]
473 val2=y2[i]
474 diff=abs(val1-val2)
475 if diff>maxDiff:
476 maxDiff=diff
477 maxPos=x[i]
478 sumDiff+=diff
479 weight=0
480 if t>minT:
481 weight+=(t-x[i-1])/2
482 if t<maxT:
483 weight+=(x[i+1]-t)/2
484 sumWeighted+=weight*diff
485
486 return { "max" : maxDiff,
487 "maxPos" : maxPos,
488 "average" : sumDiff/cnt,
489 "wAverage" : sumWeighted/(maxT-minT),
490 "tMin": minT,
491 "tMax": maxT}
492
493 - def metrics(self,
494 name,
495 time=None,
496 minTime=None,
497 maxTime=None):
498 """Calculates the metrics for a data set. Returns a dictionary
499 with a number of norms: minimum, maximum, average, average weighted by time
500 @param name: name of the data column to be evaluated. Assumes that that column
501 is ordered in ascending order
502 @param time: name of the time column. If none is given then the first column is assumed
503 @param minTime: first time to take metrics from
504 @param maxTime: latest time to take matrics from"""
505
506 if time==None:
507 time=self.time
508
509 x=self.data[time]
510 y=self.data[name]
511
512 minVal=1e40
513 maxVal=-1e40
514 sum=0
515 sumWeighted=0
516
517 minT,maxT=x[0],x[-1]
518
519 if minTime:
520 if minTime>minT:
521 minT=minTime
522
523 if maxTime:
524 if maxTime<maxT:
525 maxT=maxTime
526
527 cnt=0
528
529 for i,t in enumerate(x):
530 if t<minT or t>maxT:
531 continue
532 cnt+=1
533 val=y[i]
534 maxVal=max(val,maxVal)
535 minVal=min(val,minVal)
536 sum+=val
537 weight=0
538 if i>0:
539 weight+=(t-x[i-1])/2
540 if i<(len(x)-1):
541 weight+=(x[i+1]-t)/2
542 sumWeighted+=weight*val
543
544 return { "max" : maxVal,
545 "min" : minVal,
546 "average" : sum/max(cnt,1),
547 "wAverage" : sumWeighted/(maxT-minT),
548 "tMin": x[0],
549 "tMax": x[-1]}
550
551
552