1
2 """
3 Data that can go into a spreadsheet (title line and rectangular data)
4 """
5
6 try:
7 import numpy
8 except ImportError:
9
10 import numpypy
11 import numpy
12
13 import copy
14 import re
15
16 from PyFoam.Error import error,FatalErrorPyFoamException,warning
17
18 from PyFoam.ThirdParty.six import PY3
19 from PyFoam.ThirdParty.six import b as toByte
20
24
26 """
27 Collects data that could go into a spreadsheet. The focus of this class is on
28 storing all the data at once
29 """
30 - def __init__(self,
31 timeName=None,
32 validData=None,
33 validMatchRegexp=False,
34 csvName=None,
35 txtName=None,
36 excelName=None,
37 data=None,
38 names=None,
39 title=None):
40 """Either this is constructed from a file or from the data and the column headers
41
42 @param timeName: the data colum that is to be considered the time in this file
43 @param validData: names of the valid data columns (all others should be discarded)
44 @param validMatchRegexp: Should the validData be interpreted as regular expressions
45 @param csvName: name of the CSV-file the data should be constructed from,
46 @param txtName: name of a file the data should be constructed from,
47 @param excelName: name of a Excel-file the data should be constructed from (uses the first sheet in the file),
48 @param data: the actual data to use
49 @param names: the names for the column header
50 @param title: a name that is used to make unique heades names"""
51
52 self.title=title
53
54 nrFileSpec=len([1 for i in [csvName,txtName,excelName] if not i is None])
55
56 if (nrFileSpec>0) and not data is None:
57 error("SpreadsheetData is either constructed from data or from a file")
58
59 if data is None and nrFileSpec>1:
60 error("Only one file specification allowed")
61
62 if csvName:
63 try:
64 rec=numpy.recfromcsv(csvName)
65 data=[tuple(float(x) for x in i) for i in rec]
66 names=list(rec.dtype.names)
67 except AttributeError:
68
69 data=list(map(tuple,numpy.loadtxt(csvName,
70 delimiter=',',
71 skiprows=1)))
72 names=open(csvName).readline().strip().split(',')
73
74
75 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names))))
76 elif txtName:
77 try:
78 rec=numpy.recfromtxt(txtName,names=True)
79 data=[tuple(float(x) for x in i) for i in rec]
80 if names is None:
81 names=list(rec.dtype.names)
82 else:
83 nr=len(list(rec.dtype.names))
84 if title is None:
85 off=len(names)-nr+1
86 self.title="_".join(names[:off])
87 names=names[:off]+["index"]+names[off:]
88 names=names[-nr:]
89
90 except AttributeError:
91
92 data=list(map(tuple,numpy.loadtxt(txtName)))
93 names=open(txtName).readline().strip().split()[1:]
94
95
96 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names))))
97 elif excelName:
98 import pandas
99 rec=pandas.read_excel(excelName).to_records()
100 data=[tuple(float(x) for x in i) for i in rec]
101 names=list(rec.dtype.names)
102
103 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names))))
104 else:
105 if data is not None and names is None:
106 error("No names given for the data")
107
108 self.data=numpy.array(list(map(tuple,data)),
109 dtype=list(zip(names,['f8']*len(names))))
110
111 if timeName:
112 try:
113 index=list(self.data.dtype.names).index(timeName)
114 except ValueError:
115 error("Time name",timeName,"not in",self.data.dtype.names)
116 else:
117 index=0
118 self.time=self.data.dtype.names[index]
119
120 if validData:
121 usedData=[]
122 usedNames=[]
123
124 for n in self.data.dtype.names:
125 if n==self.time or self.validName(n,validData,validMatchRegexp):
126 usedData.append(tuple(self.data[n]))
127 usedNames.append(n)
128
129 usedData=numpy.array(usedData).transpose()
130 self.data=numpy.array(list(map(tuple,usedData)),
131 dtype=list(zip(usedNames,['f8']*len(usedNames))))
132 index=list(self.data.dtype.names).index(self.time)
133
134 if self.title!=None:
135 self.data.dtype.names=[self.title+" "+x for x in self.data.dtype.names[0:index]]+[self.data.dtype.names[index]]+[self.title+" "+x for x in self.data.dtype.names[index+1:]]
136
137 - def validName(self,n,validData,validMatchRegexp=False):
138 if n in validData:
139 return True
140 elif validMatchRegexp:
141 for r in validData:
142 exp=None
143 try:
144 exp=re.compile(r)
145 except:
146 pass
147 if not exp is None:
148 if exp.search(n):
149 return True
150 return False
151
154
156 return self.data.size
157
158 - def writeCSV(self,fName,
159 delimiter=","):
160 """Write data to a CSV-file
161 @param fName: Name of the file
162 @param delimiter: Delimiter to be used in the CSV-file"""
163
164 f=open(fName,"wb")
165 if PY3:
166 f.write(toByte(delimiter.join(self.names())+"\n"))
167 else:
168 f.write(delimiter.join(self.names())+"\n")
169
170 numpy.savetxt(f,self.data,delimiter=delimiter)
171
173 """Return the range of times
174 @param time: name of the time. If None the first column is used"""
175 if time==None:
176 time=self.time
177 t=self.data[time]
178
179 return (t[0],t[-1])
180
181 - def join(self,other,time=None,prefix=None):
182 """Join this object with another. Assume that they have the same
183 amount of rows and that they have one column that designates the
184 time and is called the same and has the same values
185 @param other: the other array
186 @param time: name of the time. If None the first column is used
187 @param prefix: String that is added to the other names. If none is given then
188 the title is used"""
189 if time==None:
190 time=self.time
191 if prefix==None:
192 prefix=other.title
193 if prefix==None:
194 prefix="other_"
195 else:
196 prefix+="_"
197
198 t1=self.data[time]
199 t2=other.data[time]
200 if len(t1)!=len(t2):
201 raise WrongDataSize()
202 if max(abs(t1-t2))>1e-10:
203 error("Times do not have the same values")
204
205 names=[]
206 data=[]
207 for n in self.names():
208 names.append(n)
209 data.append(self.data[n])
210
211 for n in other.names():
212 if n!=time:
213 if n in self.names():
214 names.append(prefix+n)
215 else:
216 names.append(n)
217 data.append(other.data[n])
218
219 return SpreadsheetData(names=names,
220 data=numpy.array(data).transpose())
221
223 """Convinience function for joining data"""
224 return self.join(other)
225
227 """Recalc or add a column to the data
228 @param name: the colum (must exist if it is not created. Otherwise it must not exist)
229 @param expr: the expression to calculate. All present column names are usable as variables.
230 There is also a variable data for subscripting if the data is not a valid variable name. If
231 the column is not create then there is also a variable this that is an alias for the name
232 @param create: whether a new data item should be created"""
233 if create and name in self.names():
234 error("Item",name,"already exists in names",self.names())
235 elif not create and not name in self.names():
236 error("Item",name,"not in names",self.names())
237
238 result=eval(expr,dict([(n,self.data[n]) for n in self.names()]+[("data",self.data)]+
239 ([("this",self.data[name] if not create else [])])))
240
241 if not create:
242 self.data[name]=result
243 else:
244 self.append(name,result)
245
246 - def append(self,
247 name,
248 data,
249 allowDuplicates=False):
250 """Add another column to the data. Assumes that the number of rows is right
251 @param name: the name of the column
252 @param data: the actual data
253 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ..."""
254
255 arr = numpy.asarray(data)
256 newname=name
257 if newname in self.names() and allowDuplicates:
258 cnt=1
259 while newname in self.names():
260 newname="%s_%d" % (name,cnt)
261 cnt+=1
262 warning("Changing name",name,"to",newname,"bacause it already exists in the data")
263 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')])
264 newrec = numpy.empty(self.data.shape, dtype=newdtype)
265 for field in self.data.dtype.fields:
266 newrec[field] = self.data[field]
267 newrec[name] = arr
268
269 self.data=newrec
270
271 - def __call__(self,
272 t,
273 name,
274 time=None,
275 invalidExtend=False,
276 noInterpolation=False):
277 """'Evaluate' the data at a specific time by linear interpolation
278 @param t: the time at which the data should be evaluated
279 @param name: name of the data column to be evaluated. Assumes that that column
280 is ordered in ascending order
281 @param time: name of the time column. If none is given then the first column is assumed
282 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan
283 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
284
285 if time==None:
286 time=self.time
287
288 x=self.data[time]
289 y=self.data[name]
290
291
292 if t<x[0]:
293 if invalidExtend:
294 return y[0]
295 else:
296 return float('nan')
297 elif t>x[-1]:
298 if invalidExtend:
299 return y[-1]
300 else:
301 return float('nan')
302
303 if noInterpolation:
304 if t==x[0]:
305 return y[0]
306 elif t==x[-1]:
307 return y[-1]
308
309 iLow=0
310 iHigh=len(x)-1
311
312 while (iHigh-iLow)>1:
313 iNew = iLow + (iHigh-iLow)/2
314
315 if x[iNew]==t:
316
317 return y[iNew]
318 elif t < x[iNew]:
319 iHigh=iNew
320 else:
321 iLow=iNew
322 if noInterpolation:
323 return float('nan')
324 else:
325 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
326
327 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
328 """Extend the data so that all new times are represented (add rows
329 if they are not there)
330 @param time: the name of the column with the time
331 @param times: the times that shoild be there
332 @param interpolate: interpolate the data in new rows. Otherwise
333 insert 'nan'
334 @param invalidExtend: if t is out of the valid range then use
335 the smallest or the biggest value. If False use nan"""
336
337 if time==None:
338 time=self.time
339
340 if len(times)==len(self.data[time]):
341 same=True
342 for i in range(len(times)):
343 if times[i]!=self.data[time][i]:
344 same=False
345 break
346 if same:
347
348 return
349
350 newData=[]
351 otherI=0
352 originalI=0
353 while otherI<len(times):
354 goOn=originalI<len(self.data[time])
355 while goOn and times[otherI]>self.data[time][originalI]:
356 newData.append(self.data[originalI])
357 originalI+=1
358 goOn=originalI<len(self.data[time])
359
360 append=True
361 if originalI<len(self.data[time]):
362 if times[otherI]==self.data[time][originalI]:
363 newData.append(self.data[originalI])
364 originalI+=1
365 otherI+=1
366 append=False
367
368 if append:
369 t=times[otherI]
370 newRow=[]
371 for n in self.names():
372 if n==time:
373 newRow.append(t)
374 elif interpolate:
375 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend))
376 else:
377 newRow.append(float('nan'))
378 newData.append(newRow)
379 otherI+=1
380
381 while originalI<len(self.data[time]):
382 newData.append(self.data[originalI])
383 originalI+=1
384
385 self.data=numpy.array(list(map(tuple,newData)),dtype=self.data.dtype)
386
387 - def resample(self,
388 other,
389 name,
390 otherName=None,
391 time=None,
392 invalidExtend=False,
393 extendData=False,
394 noInterpolation=False):
395 """Calculate values from another dataset at the same times as in this data-set
396 @param other: the other data-set
397 @param name: name of the data column to be evaluated. Assumes that that column
398 is ordered in ascending order
399 @param time: name of the time column. If none is given then the first column is assumed
400 @param invalidExtend: see __call__
401 @param extendData: if the time range of x is bigger than the range then extend the range before resampling
402 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
403 if time==None:
404 time=self.time
405
406 if extendData and (
407 self.data[time][0] > other.data[time][0] or \
408 self.data[time][-1] < other.data[time][-1]):
409 pre=[]
410 i=0
411 while other.data[time][i] < self.data[time][0]:
412 data=[]
413 for n in self.names():
414 if n==time:
415 data.append(other.data[time][i])
416 else:
417 data.append(float('nan'))
418 pre.append(data)
419 i+=1
420 if i>=len(other.data[time]):
421 break
422 if len(pre)>0:
423 self.data=numpy.concatenate((numpy.array(list(map(tuple,pre)),
424 dtype=self.data.dtype),
425 self.data))
426
427 post=[]
428 i=-1
429 while other.data[time][i] > self.data[time][-1]:
430 data=[]
431 for n in self.names():
432 if n==time:
433 data.append(other.data[time][i])
434 else:
435 data.append(float('nan'))
436 post.append(data)
437 i-=1
438 if abs(i)>=len(other.data[time])+1:
439 break
440
441 post.reverse()
442 if len(post)>0:
443 self.data=numpy.concatenate((self.data,numpy.array(list(map(tuple,post)),
444 dtype=self.data.dtype)))
445
446 result=[]
447
448 for t in self.data[time]:
449 nm=name
450 if otherName:
451 nm=otherName
452 result.append(other(t,nm,
453 time=time,
454 invalidExtend=invalidExtend,
455 noInterpolation=noInterpolation))
456
457 return result
458
459 - def compare(self,
460 other,
461 name,
462 otherName=None,
463 time=None,
464 common=False,
465 minTime=None,
466 maxTime=None):
467 """Compare this data-set with another. The time-points of this dataset are used as
468 a reference. Returns a dictionary with a number of norms: maximum absolute
469 difference, average absolute difference
470 on all timepoints, average absolute difference weighted by time
471 @param other: the other data-set
472 @param name: name of the data column to be evaluated. Assumes that that column
473 is ordered in ascending order
474 @param time: name of the time column. If none is given then the first column is assumed
475 @param common: cut off the parts where not both data sets are defined
476 @param minTime: first time which should be compared
477 @param maxTime: last time to compare"""
478
479 if time==None:
480 time=self.time
481
482 x=self.data[time]
483 y=self.data[name]
484 y2=self.resample(other,name,otherName=otherName,time=time,invalidExtend=True)
485
486 minT,maxT=minTime,maxTime
487 if common:
488 minTmp,maxTmp=max(x[0],other.data[time][0]),min(x[-1],other.data[time][-1])
489 for i in range(len(x)):
490 if minTmp<=x[i]:
491 minT=x[i]
492 break
493 for i in range(len(x)):
494 val=x[-(i+1)]
495 if maxTmp>=val:
496 maxT=val
497 break
498 else:
499 minT,maxT=x[0],x[-1]
500
501 result = { "max" : None,
502 "maxPos" : None,
503 "average" : None,
504 "wAverage" : None,
505 "tMin": None,
506 "tMax": None }
507
508 if minT==None or maxT==None:
509 return result
510
511 if minTime:
512 if minTime>minT:
513 minT=minTime
514
515 if maxTime:
516 if maxTime<maxT:
517 maxT=maxTime
518
519 if maxT<minT:
520 return result
521
522 maxDiff=0
523 maxPos=x[0]
524 sumDiff=0
525 sumWeighted=0
526 cnt=0
527
528 for i,t in enumerate(x):
529 if t<minT or t>maxT:
530 continue
531 cnt+=1
532
533 val1=y[i]
534 val2=y2[i]
535 diff=abs(val1-val2)
536 if diff>maxDiff:
537 maxDiff=diff
538 maxPos=x[i]
539 sumDiff+=diff
540 weight=0
541 if t>minT:
542 weight+=(t-x[i-1])/2
543 if t<maxT:
544 weight+=(x[i+1]-t)/2
545 sumWeighted+=weight*diff
546
547 return { "max" : maxDiff,
548 "maxPos" : maxPos,
549 "average" : sumDiff/cnt,
550 "wAverage" : sumWeighted/(maxT-minT),
551 "tMin": minT,
552 "tMax": maxT}
553
554 - def metrics(self,
555 name,
556 time=None,
557 minTime=None,
558 maxTime=None):
559 """Calculates the metrics for a data set. Returns a dictionary
560 with a number of norms: minimum, maximum, average, average weighted by time
561 @param name: name of the data column to be evaluated. Assumes that that column
562 is ordered in ascending order
563 @param time: name of the time column. If none is given then the first column is assumed
564 @param minTime: first time to take metrics from
565 @param maxTime: latest time to take matrics from"""
566
567 if time==None:
568 time=self.time
569
570 x=self.data[time]
571 y=self.data[name]
572
573 minVal=1e40
574 maxVal=-1e40
575 sum=0
576 sumWeighted=0
577
578 minT,maxT=x[0],x[-1]
579
580 if minTime:
581 if minTime>minT:
582 minT=minTime
583
584 if maxTime:
585 if maxTime<maxT:
586 maxT=maxTime
587
588 cnt=0
589
590 for i,t in enumerate(x):
591 if t<minT or t>maxT:
592 continue
593 cnt+=1
594 val=y[i]
595 maxVal=max(val,maxVal)
596 minVal=min(val,minVal)
597 sum+=val
598 weight=0
599 if i>0:
600 weight+=(t-x[i-1])/2
601 if i<(len(x)-1):
602 weight+=(x[i+1]-t)/2
603 sumWeighted+=weight*val
604
605 return { "max" : maxVal,
606 "min" : minVal,
607 "average" : sum/max(cnt,1),
608 "wAverage" : sumWeighted/(maxT-minT),
609 "tMin": x[0],
610 "tMax": x[-1]}
611
613 """Return a dictionary of the data in the DataFrame format of pandas
614 @param: drop duplicate times (setting it to False might break certain Pandas-operations)"""
615 try:
616 from PyFoam.Wrappers.Pandas import PyFoamDataFrame
617 except ImportError:
618 warning("No pandas-library installed. Returning None")
619 return None
620
621 return PyFoamDataFrame(self.getSeries(reindex=reindex))
622
624 """Return a dictionary of the data-columns in the Series format of pandas
625 @param: drop duplicate times (setting it to False might break certain Pandas-operations)"""
626 try:
627 import pandas
628 except ImportError:
629 warning("No pandas-library installed. Returning None")
630 return None
631 data={}
632
633 if reindex:
634 realindex=numpy.unique(self.data[self.time])
635
636 for n in self.names():
637 if n!=self.time:
638 data[n]=pandas.Series(self.data[n],
639 index=self.data[self.time],
640 name=n)
641 if reindex:
642 if len(data[n])!=len(realindex):
643 data[n].axes[0].is_unique=True
644 data[n]=data[n].reindex_axis(realindex)
645
646 return data
647
648
649