Package PyFoam :: Package Basics :: Module SpreadsheetData
[hide private]
[frames] | no frames]

Source Code for Module PyFoam.Basics.SpreadsheetData

  1  #  ICE Revision: $Id: $ 
  2  """ 
  3  Data that can go into a spreadsheet (title line and rectangular data) 
  4  """ 
  5   
  6  try: 
  7      import numpy 
  8  except ImportError: 
  9      # assume this is pypy and retry 
 10      import numpypy 
 11      import numpy 
 12   
 13  import copy 
 14   
 15  from PyFoam.Error import error,FatalErrorPyFoamException,warning 
 16   
 17  from PyFoam.ThirdParty.six import PY3 
 18  from PyFoam.ThirdParty.six import b as toByte 
 19   
20 -class WrongDataSize(FatalErrorPyFoamException):
21 - def __init__(self):
22 FatalErrorPyFoamException.__init__(self,"Size of the arrays differs")
23
24 -class SpreadsheetData(object):
25 """ 26 Collects data that could go into a spreadsheet. The focus of this class is on 27 storing all the data at once 28 """
29 - def __init__(self, 30 timeName=None, 31 validData=None, 32 csvName=None, 33 txtName=None, 34 data=None, 35 names=None, 36 title=None):
37 """Either this is constructed from a file or from the data and the column headers 38 39 @param timeName: the data colum that is to be considered the time in this file 40 @param validData: names of the valid data columns (all others should be discarded) 41 @param csvName: name of the CSV-file the data should be constructed from, 42 @param txtName: name of a file the data should be constructed from, 43 @param data: the actual data to use 44 @param names: the names for the column header 45 @param title: a name that is used to make unique heades names""" 46 47 self.title=title 48 49 if (csvName or txtName) and data: 50 error("SpreadsheetData is either constructed from data or from a file") 51 52 if csvName: 53 try: 54 rec=numpy.recfromcsv(csvName) 55 data=[tuple(float(x) for x in i) for i in rec] 56 names=list(rec.dtype.names) 57 except AttributeError: 58 # for old numpy-versions 59 data=list(map(tuple,numpy.loadtxt(csvName, 60 delimiter=',', 61 skiprows=1))) 62 names=open(csvName).readline().strip().split(',') 63 64 # redo this to make sure that everything is float 65 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names)))) 66 elif txtName: 67 try: 68 rec=numpy.recfromtxt(txtName,names=True) 69 data=[tuple(float(x) for x in i) for i in rec] 70 names=list(rec.dtype.names) 71 except AttributeError: 72 # for old numpy-versions 73 data=list(map(tuple,numpy.loadtxt(txtName))) 74 names=open(txtName).readline().strip().split()[1:] 75 76 # redo this to make sure that everything is float 77 self.data=numpy.array(data,dtype=list(zip(names,['f8']*len(names)))) 78 else: 79 if data!=None and names==None: 80 error("No names given for the data") 81 82 self.data=numpy.array(list(map(tuple,data)), 83 dtype=list(zip(names,['f8']*len(names)))) 84 85 if timeName: 86 try: 87 index=list(self.data.dtype.names).index(timeName) 88 except ValueError: 89 error("Time name",timeName,"not in",self.data.dtype.names) 90 else: 91 index=0 92 self.time=self.data.dtype.names[index] 93 94 if validData: 95 usedData=[] 96 usedNames=[] 97 98 for n in self.data.dtype.names: 99 if n==self.time or n in validData: 100 usedData.append(tuple(self.data[n])) 101 usedNames.append(n) 102 103 usedData=numpy.array(usedData).transpose() 104 self.data=numpy.array(list(map(tuple,usedData)), 105 dtype=list(zip(usedNames,['f8']*len(usedNames)))) 106 index=list(self.data.dtype.names).index(self.time) 107 108 if self.title!=None: 109 self.data.dtype.names=[self.title+" "+x for x in self.data.dtype.names[0:index]]+[self.data.dtype.names[index]]+[self.title+" "+x for x in self.data.dtype.names[index+1:]]
110
111 - def names(self):
112 return copy.copy(self.data.dtype.names)
113
114 - def size(self):
115 return self.data.size
116
117 - def writeCSV(self,fName, 118 delimiter=","):
119 """Write data to a CSV-file 120 @param fName: Name of the file 121 @param delimiter: Delimiter to be used in the CSV-file""" 122 123 f=open(fName,"wb") 124 if PY3: 125 f.write(toByte(delimiter.join(self.names())+"\n")) 126 else: 127 f.write(delimiter.join(self.names())+"\n") 128 129 numpy.savetxt(f,self.data,delimiter=delimiter)
130
131 - def tRange(self,time=None):
132 """Return the range of times 133 @param time: name of the time. If None the first column is used""" 134 if time==None: 135 time=self.time 136 t=self.data[time] 137 138 return (t[0],t[-1])
139
140 - def join(self,other,time=None,prefix=None):
141 """Join this object with another. Assume that they have the same 142 amount of rows and that they have one column that designates the 143 time and is called the same and has the same values 144 @param other: the other array 145 @param time: name of the time. If None the first column is used 146 @param prefix: String that is added to the other names. If none is given then 147 the title is used""" 148 if time==None: 149 time=self.time 150 if prefix==None: 151 prefix=other.title 152 if prefix==None: 153 prefix="other_" 154 else: 155 prefix+="_" 156 157 t1=self.data[time] 158 t2=other.data[time] 159 if len(t1)!=len(t2): 160 raise WrongDataSize() 161 if max(abs(t1-t2))>1e-10: 162 error("Times do not have the same values") 163 164 names=[] 165 data=[] 166 for n in self.names(): 167 names.append(n) 168 data.append(self.data[n]) 169 170 for n in other.names(): 171 if n!=time: 172 if n in self.names(): 173 names.append(prefix+n) 174 else: 175 names.append(n) 176 data.append(other.data[n]) 177 178 return SpreadsheetData(names=names, 179 data=numpy.array(data).transpose())
180
181 - def __add__(self,other):
182 """Convinience function for joining data""" 183 return self.join(other)
184
185 - def append(self, 186 name, 187 data, 188 allowDuplicates=False):
189 """Add another column to the data. Assumes that the number of rows is right 190 @param name: the name of the column 191 @param data: the actual data 192 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ...""" 193 194 arr = numpy.asarray(data) 195 newname=name 196 if newname in self.names() and allowDuplicates: 197 cnt=1 198 while newname in self.names(): 199 newname="%s_%d" % (name,cnt) 200 cnt+=1 201 warning("Changing name",name,"to",newname,"bacause it already exists in the data") 202 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')]) 203 newrec = numpy.empty(self.data.shape, dtype=newdtype) 204 for field in self.data.dtype.fields: 205 newrec[field] = self.data[field] 206 newrec[name] = arr 207 208 self.data=newrec
209
210 - def __call__(self, 211 t, 212 name, 213 time=None, 214 invalidExtend=False, 215 noInterpolation=False):
216 """'Evaluate' the data at a specific time by linear interpolation 217 @param t: the time at which the data should be evaluated 218 @param name: name of the data column to be evaluated. Assumes that that column 219 is ordered in ascending order 220 @param time: name of the time column. If none is given then the first column is assumed 221 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan 222 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 223 224 if time==None: 225 time=self.time 226 227 x=self.data[time] 228 y=self.data[name] 229 230 # get extremes 231 if t<x[0]: 232 if invalidExtend: 233 return y[0] 234 else: 235 return float('nan') 236 elif t>x[-1]: 237 if invalidExtend: 238 return y[-1] 239 else: 240 return float('nan') 241 242 if noInterpolation: 243 if t==x[0]: 244 return y[0] 245 elif t==x[-1]: 246 return y[-1] 247 248 iLow=0 249 iHigh=len(x)-1 250 251 while (iHigh-iLow)>1: 252 iNew = iLow + (iHigh-iLow)/2 253 254 if x[iNew]==t: 255 # we got lucky 256 return y[iNew] 257 elif t < x[iNew]: 258 iHigh=iNew 259 else: 260 iLow=iNew 261 if noInterpolation: 262 return float('nan') 263 else: 264 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
265
266 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
267 """Extend the data so that all new times are represented (add rows 268 if they are not there) 269 @param time: the name of the column with the time 270 @param times: the times that shoild be there 271 @param interpolate: interpolate the data in new rows. Otherwise 272 insert 'nan' 273 @param invalidExtend: if t is out of the valid range then use 274 the smallest or the biggest value. If False use nan""" 275 276 if time==None: 277 time=self.time 278 279 if len(times)==len(self.data[time]): 280 same=True 281 for i in range(len(times)): 282 if times[i]!=self.data[time][i]: 283 same=False 284 break 285 if same: 286 # No difference between the times 287 return 288 289 newData=[] 290 otherI=0 291 originalI=0 292 while otherI<len(times): 293 goOn=originalI<len(self.data[time]) 294 while goOn and times[otherI]>self.data[time][originalI]: 295 newData.append(self.data[originalI]) 296 originalI+=1 297 goOn=originalI<len(self.data[time]) 298 299 append=True 300 if originalI<len(self.data[time]): 301 if times[otherI]==self.data[time][originalI]: 302 newData.append(self.data[originalI]) 303 originalI+=1 304 otherI+=1 305 append=False 306 307 if append: 308 t=times[otherI] 309 newRow=[] 310 for n in self.names(): 311 if n==time: 312 newRow.append(t) 313 elif interpolate: 314 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend)) 315 else: 316 newRow.append(float('nan')) 317 newData.append(newRow) 318 otherI+=1 319 320 while originalI<len(self.data[time]): 321 newData.append(self.data[originalI]) 322 originalI+=1 323 324 self.data=numpy.array(list(map(tuple,newData)),dtype=self.data.dtype)
325
326 - def resample(self, 327 other, 328 name, 329 otherName=None, 330 time=None, 331 invalidExtend=False, 332 extendData=False, 333 noInterpolation=False):
334 """Calculate values from another dataset at the same times as in this data-set 335 @param other: the other data-set 336 @param name: name of the data column to be evaluated. Assumes that that column 337 is ordered in ascending order 338 @param time: name of the time column. If none is given then the first column is assumed 339 @param invalidExtend: see __call__ 340 @param extendData: if the time range of x is bigger than the range then extend the range before resampling 341 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 342 if time==None: 343 time=self.time 344 345 if extendData and ( 346 self.data[time][0] > other.data[time][0] or \ 347 self.data[time][-1] < other.data[time][-1]): 348 pre=[] 349 i=0 350 while other.data[time][i] < self.data[time][0]: 351 data=[] 352 for n in self.names(): 353 if n==time: 354 data.append(other.data[time][i]) 355 else: 356 data.append(float('nan')) 357 pre.append(data) 358 i+=1 359 if i>=len(other.data[time]): 360 break 361 if len(pre)>0: 362 self.data=numpy.concatenate((numpy.array(list(map(tuple,pre)), 363 dtype=self.data.dtype), 364 self.data)) 365 366 post=[] 367 i=-1 368 while other.data[time][i] > self.data[time][-1]: 369 data=[] 370 for n in self.names(): 371 if n==time: 372 data.append(other.data[time][i]) 373 else: 374 data.append(float('nan')) 375 post.append(data) 376 i-=1 377 if abs(i)>=len(other.data[time])+1: 378 break 379 380 post.reverse() 381 if len(post)>0: 382 self.data=numpy.concatenate((self.data,numpy.array(list(map(tuple,post)), 383 dtype=self.data.dtype))) 384 385 result=[] 386 387 for t in self.data[time]: 388 nm=name 389 if otherName: 390 nm=otherName 391 result.append(other(t,nm, 392 time=time, 393 invalidExtend=invalidExtend, 394 noInterpolation=noInterpolation)) 395 396 return result
397
398 - def compare(self, 399 other, 400 name, 401 otherName=None, 402 time=None, 403 common=False, 404 minTime=None, 405 maxTime=None):
406 """Compare this data-set with another. The time-points of this dataset are used as 407 a reference. Returns a dictionary with a number of norms: maximum absolute 408 difference, average absolute difference 409 on all timepoints, average absolute difference weighted by time 410 @param other: the other data-set 411 @param name: name of the data column to be evaluated. Assumes that that column 412 is ordered in ascending order 413 @param time: name of the time column. If none is given then the first column is assumed 414 @param common: cut off the parts where not both data sets are defined 415 @param minTime: first time which should be compared 416 @param maxTime: last time to compare""" 417 418 if time==None: 419 time=self.time 420 421 x=self.data[time] 422 y=self.data[name] 423 y2=self.resample(other,name,otherName=otherName,time=time,invalidExtend=True) 424 425 minT,maxT=minTime,maxTime 426 if common: 427 minTmp,maxTmp=max(x[0],other.data[time][0]),min(x[-1],other.data[time][-1]) 428 for i in range(len(x)): 429 if minTmp<=x[i]: 430 minT=x[i] 431 break 432 for i in range(len(x)): 433 val=x[-(i+1)] 434 if maxTmp>=val: 435 maxT=val 436 break 437 else: 438 minT,maxT=x[0],x[-1] 439 440 result = { "max" : None, 441 "maxPos" : None, 442 "average" : None, 443 "wAverage" : None, 444 "tMin": None, 445 "tMax": None } 446 447 if minT==None or maxT==None: 448 return result 449 450 if minTime: 451 if minTime>minT: 452 minT=minTime 453 454 if maxTime: 455 if maxTime<maxT: 456 maxT=maxTime 457 458 if maxT<minT: 459 return result 460 461 maxDiff=0 462 maxPos=x[0] 463 sumDiff=0 464 sumWeighted=0 465 cnt=0 466 467 for i,t in enumerate(x): 468 if t<minT or t>maxT: 469 continue 470 cnt+=1 471 472 val1=y[i] 473 val2=y2[i] 474 diff=abs(val1-val2) 475 if diff>maxDiff: 476 maxDiff=diff 477 maxPos=x[i] 478 sumDiff+=diff 479 weight=0 480 if t>minT: 481 weight+=(t-x[i-1])/2 482 if t<maxT: 483 weight+=(x[i+1]-t)/2 484 sumWeighted+=weight*diff 485 486 return { "max" : maxDiff, 487 "maxPos" : maxPos, 488 "average" : sumDiff/cnt, 489 "wAverage" : sumWeighted/(maxT-minT), 490 "tMin": minT, 491 "tMax": maxT}
492
493 - def metrics(self, 494 name, 495 time=None, 496 minTime=None, 497 maxTime=None):
498 """Calculates the metrics for a data set. Returns a dictionary 499 with a number of norms: minimum, maximum, average, average weighted by time 500 @param name: name of the data column to be evaluated. Assumes that that column 501 is ordered in ascending order 502 @param time: name of the time column. If none is given then the first column is assumed 503 @param minTime: first time to take metrics from 504 @param maxTime: latest time to take matrics from""" 505 506 if time==None: 507 time=self.time 508 509 x=self.data[time] 510 y=self.data[name] 511 512 minVal=1e40 513 maxVal=-1e40 514 sum=0 515 sumWeighted=0 516 517 minT,maxT=x[0],x[-1] 518 519 if minTime: 520 if minTime>minT: 521 minT=minTime 522 523 if maxTime: 524 if maxTime<maxT: 525 maxT=maxTime 526 527 cnt=0 528 529 for i,t in enumerate(x): 530 if t<minT or t>maxT: 531 continue 532 cnt+=1 533 val=y[i] 534 maxVal=max(val,maxVal) 535 minVal=min(val,minVal) 536 sum+=val 537 weight=0 538 if i>0: 539 weight+=(t-x[i-1])/2 540 if i<(len(x)-1): 541 weight+=(x[i+1]-t)/2 542 sumWeighted+=weight*val 543 544 return { "max" : maxVal, 545 "min" : minVal, 546 "average" : sum/max(cnt,1), 547 "wAverage" : sumWeighted/(maxT-minT), 548 "tMin": x[0], 549 "tMax": x[-1]}
550 551 # Should work with Python3 and Python2 552