Package PyFoam :: Package Basics :: Module SpreadsheetData
[hide private]
[frames] | no frames]

Source Code for Module PyFoam.Basics.SpreadsheetData

  1  #  ICE Revision: $Id: $  
  2  """ 
  3  Data that can go into a spreadsheet (title line and rectangular data) 
  4  """ 
  5   
  6  import numpy,copy 
  7   
  8  from PyFoam.Error import error,FatalErrorPyFoamException,warning 
  9   
10 -class WrongDataSize(FatalErrorPyFoamException):
11 - def __init__(self):
12 FatalErrorPyFoamException.__init__(self,"Size of the arrays differs")
13
14 -class SpreadsheetData(object):
15 """ 16 Collects data that could go into a spreadsheet. The focus of this class is on 17 storing all the data at once 18 """
19 - def __init__(self, 20 timeName=None, 21 validData=None, 22 csvName=None, 23 txtName=None, 24 data=None, 25 names=None, 26 title=None):
27 """Either this is constructed from a file or from the data and the column headers 28 29 @param timeName: the data colum that is to be considered the time in this file 30 @param validData: names of the valid data columns (all others should be discarded) 31 @param csvName: name of the CSV-file the data should be constructed from, 32 @param txtName: name of a file the data should be constructed from, 33 @param data: the actual data to use 34 @param names: the names for the column header 35 @param title: a name that is used to make unique heades names""" 36 37 self.title=title 38 39 if (csvName or txtName) and data: 40 error("SpreadsheetData is either constructed from data or from a file") 41 42 if csvName: 43 try: 44 rec=numpy.recfromcsv(csvName) 45 data=[tuple(float(x) for x in i) for i in rec] 46 names=list(rec.dtype.names) 47 except AttributeError: 48 # for old numpy-versions 49 data=map(tuple,numpy.loadtxt(csvName,delimiter=',',skiprows=1)) 50 names=open(csvName).readline().strip().split(',') 51 52 # redo this to make sure that everything is float 53 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names))) 54 elif txtName: 55 try: 56 rec=numpy.recfromtxt(txtName,names=True) 57 data=[tuple(float(x) for x in i) for i in rec] 58 names=list(rec.dtype.names) 59 except AttributeError: 60 # for old numpy-versions 61 data=map(tuple,numpy.loadtxt(txtName)) 62 names=open(txtName).readline().strip().split()[1:] 63 64 # redo this to make sure that everything is float 65 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names))) 66 else: 67 if data!=None and names==None: 68 error("No names given for the data") 69 70 self.data=numpy.array(map(tuple,data),dtype=zip(names,['f8']*len(names))) 71 72 if timeName: 73 try: 74 index=list(self.data.dtype.names).index(timeName) 75 except ValueError: 76 error("Time name",timeName,"not in",self.data.dtype.names) 77 else: 78 index=0 79 self.time=self.data.dtype.names[index] 80 81 if validData: 82 usedData=[] 83 usedNames=[] 84 85 for n in self.data.dtype.names: 86 if n==self.time or n in validData: 87 usedData.append(tuple(self.data[n])) 88 usedNames.append(n) 89 90 usedData=numpy.array(usedData).transpose() 91 self.data=numpy.array(map(tuple,usedData),dtype=zip(usedNames,['f8']*len(usedNames))) 92 index=list(self.data.dtype.names).index(self.time) 93 94 if self.title!=None: 95 self.data.dtype.names=map(lambda x:self.title+" "+x,self.data.dtype.names[0:index])+[self.data.dtype.names[index]]+map(lambda x:self.title+" "+x,self.data.dtype.names[index+1:])
96
97 - def names(self):
98 return copy.copy(self.data.dtype.names)
99
100 - def size(self):
101 return self.data.size
102
103 - def writeCSV(self,fName, 104 delimiter=","):
105 """Write data to a CSV-file 106 @param fName: Name of the file 107 @param delimiter: Delimiter to be used in the CSV-file""" 108 109 f=open(fName,"w") 110 f.write(delimiter.join(self.names())+"\n") 111 numpy.savetxt(f,self.data,delimiter=delimiter)
112
113 - def tRange(self,time=None):
114 """Return the range of times 115 @param time: name of the time. If None the first column is used""" 116 if time==None: 117 time=self.time 118 t=self.data[time] 119 120 return (t[0],t[-1])
121
122 - def join(self,other,time=None,prefix=None):
123 """Join this object with another. Assume that they have the same 124 amount of rows and that they have one column that designates the 125 time and is called the same and has the same values 126 @param other: the other array 127 @param time: name of the time. If None the first column is used 128 @param prefix: String that is added to the other names. If none is given then 129 the title is used""" 130 if time==None: 131 time=self.time 132 if prefix==None: 133 prefix=other.title 134 if prefix==None: 135 prefix="other_" 136 else: 137 prefix+="_" 138 139 t1=self.data[time] 140 t2=other.data[time] 141 if len(t1)!=len(t2): 142 raise WrongDataSize() 143 if max(abs(t1-t2))>1e-10: 144 error("Times do not have the same values") 145 146 names=[] 147 data=[] 148 for n in self.names(): 149 names.append(n) 150 data.append(self.data[n]) 151 152 for n in other.names(): 153 if n!=time: 154 if n in self.names(): 155 names.append(prefix+n) 156 else: 157 names.append(n) 158 data.append(other.data[n]) 159 160 return SpreadsheetData(names=names, 161 data=numpy.array(data).transpose())
162
163 - def __add__(self,other):
164 """Convinience function for joining data""" 165 return self.join(other)
166
167 - def append(self, 168 name, 169 data, 170 allowDuplicates=False):
171 """Add another column to the data. Assumes that the number of rows is right 172 @param name: the name of the column 173 @param data: the actual data 174 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ...""" 175 176 arr = numpy.asarray(data) 177 newname=name 178 if newname in self.names() and allowDuplicates: 179 cnt=1 180 while newname in self.names(): 181 newname="%s_%d" % (name,cnt) 182 cnt+=1 183 warning("Changing name",name,"to",newname,"bacause it already exists in the data") 184 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')]) 185 newrec = numpy.empty(self.data.shape, dtype=newdtype) 186 for field in self.data.dtype.fields: 187 newrec[field] = self.data[field] 188 newrec[name] = arr 189 190 self.data=newrec
191
192 - def __call__(self, 193 t, 194 name, 195 time=None, 196 invalidExtend=False, 197 noInterpolation=False):
198 """'Evaluate' the data at a specific time by linear interpolation 199 @param t: the time at which the data should be evaluated 200 @param name: name of the data column to be evaluated. Assumes that that column 201 is ordered in ascending order 202 @param time: name of the time column. If none is given then the first column is assumed 203 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan 204 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 205 206 if time==None: 207 time=self.time 208 209 x=self.data[time] 210 y=self.data[name] 211 212 # get extremes 213 if t<x[0]: 214 if invalidExtend: 215 return y[0] 216 else: 217 return float('nan') 218 elif t>x[-1]: 219 if invalidExtend: 220 return y[-1] 221 else: 222 return float('nan') 223 224 if noInterpolation: 225 if t==x[0]: 226 return y[0] 227 elif t==x[-1]: 228 return y[-1] 229 230 iLow=0 231 iHigh=len(x)-1 232 233 while (iHigh-iLow)>1: 234 iNew = iLow + (iHigh-iLow)/2 235 236 if x[iNew]==t: 237 # we got lucky 238 return y[iNew] 239 elif t < x[iNew]: 240 iHigh=iNew 241 else: 242 iLow=iNew 243 if noInterpolation: 244 return float('nan') 245 else: 246 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
247
248 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
249 """Extend the data so that all new times are represented (add rows 250 if they are not there) 251 @param time: the name of the column with the time 252 @param times: the times that shoild be there 253 @param interpolate: interpolate the data in new rows. Otherwise 254 insert 'nan' 255 @param invalidExtend: if t is out of the valid range then use 256 the smallest or the biggest value. If False use nan""" 257 258 if time==None: 259 time=self.time 260 261 if len(times)==len(self.data[time]): 262 same=True 263 for i in range(len(times)): 264 if times[i]!=self.data[time][i]: 265 same=False 266 break 267 if same: 268 # No difference between the times 269 return 270 271 newData=[] 272 otherI=0 273 originalI=0 274 while otherI<len(times): 275 goOn=originalI<len(self.data[time]) 276 while goOn and times[otherI]>self.data[time][originalI]: 277 newData.append(self.data[originalI]) 278 originalI+=1 279 goOn=originalI<len(self.data[time]) 280 281 append=True 282 if originalI<len(self.data[time]): 283 if times[otherI]==self.data[time][originalI]: 284 newData.append(self.data[originalI]) 285 originalI+=1 286 otherI+=1 287 append=False 288 289 if append: 290 t=times[otherI] 291 newRow=[] 292 for n in self.names(): 293 if n==time: 294 newRow.append(t) 295 elif interpolate: 296 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend)) 297 else: 298 newRow.append(float('nan')) 299 newData.append(newRow) 300 otherI+=1 301 302 while originalI<len(self.data[time]): 303 newData.append(self.data[originalI]) 304 originalI+=1 305 306 self.data=numpy.array(map(tuple,newData),dtype=self.data.dtype)
307
308 - def resample(self, 309 other, 310 name, 311 otherName=None, 312 time=None, 313 invalidExtend=False, 314 extendData=False, 315 noInterpolation=False):
316 """Calculate values from another dataset at the same times as in this data-set 317 @param other: the other data-set 318 @param name: name of the data column to be evaluated. Assumes that that column 319 is ordered in ascending order 320 @param time: name of the time column. If none is given then the first column is assumed 321 @param invalidExtend: see __call__ 322 @param extendData: if the time range of x is bigger than the range then extend the range before resampling 323 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'""" 324 if time==None: 325 time=self.time 326 327 if extendData and ( 328 self.data[time][0] > other.data[time][0] or \ 329 self.data[time][-1] < other.data[time][-1]): 330 pre=[] 331 i=0 332 while other.data[time][i] < self.data[time][0]: 333 data=[] 334 for n in self.names(): 335 if n==time: 336 data.append(other.data[time][i]) 337 else: 338 data.append(float('nan')) 339 pre.append(data) 340 i+=1 341 if i>=len(other.data[time]): 342 break 343 if len(pre)>0: 344 self.data=numpy.concatenate((numpy.array(map(tuple,pre),dtype=self.data.dtype),self.data)) 345 346 post=[] 347 i=-1 348 while other.data[time][i] > self.data[time][-1]: 349 data=[] 350 for n in self.names(): 351 if n==time: 352 data.append(other.data[time][i]) 353 else: 354 data.append(float('nan')) 355 post.append(data) 356 i-=1 357 if abs(i)>=len(other.data[time])+1: 358 break 359 360 post.reverse() 361 if len(post)>0: 362 self.data=numpy.concatenate((self.data,numpy.array(map(tuple,post),dtype=self.data.dtype))) 363 364 result=[] 365 366 for t in self.data[time]: 367 nm=name 368 if otherName: 369 nm=otherName 370 result.append(other(t,nm, 371 time=time, 372 invalidExtend=invalidExtend, 373 noInterpolation=noInterpolation)) 374 375 return result
376
377 - def compare(self,other,name,otherName=None,time=None,common=False):
378 """Compare this data-set with another. The time-points of this dataset are used as 379 a reference. Returns a dictionary with a number of norms: maximum absolute 380 difference, average absolute difference 381 on all timepoints, average absolute difference weighted by time 382 @param other: the other data-set 383 @param name: name of the data column to be evaluated. Assumes that that column 384 is ordered in ascending order 385 @param time: name of the time column. If none is given then the first column is assumed 386 @param common: cut off the parts where not both data sets are defined""" 387 388 if time==None: 389 time=self.time 390 391 x=self.data[time] 392 y=self.data[name] 393 y2=self.resample(other,name,otherName=otherName,time=time,invalidExtend=True) 394 395 minT,maxT=None,None 396 if common: 397 minTmp,maxTmp=max(x[0],other.data[time][0]),min(x[-1],other.data[time][-1]) 398 for i in range(len(x)): 399 if minTmp<=x[i]: 400 minT=x[i] 401 break 402 for i in range(len(x)): 403 val=x[-(i+1)] 404 if maxTmp>=val: 405 maxT=val 406 break 407 else: 408 minT,maxT=x[0],x[-1] 409 410 if minT==None or maxT==None: 411 return { "max" : None, 412 "maxPos" : None, 413 "average" : None, 414 "wAverage" : None, 415 "tMin": None, 416 "tMax": None } 417 418 maxDiff=0 419 maxPos=x[0] 420 sumDiff=0 421 sumWeighted=0 422 cnt=0 423 424 for i,t in enumerate(x): 425 if t<minT or t>maxT: 426 continue 427 cnt+=1 428 429 val1=y[i] 430 val2=y2[i] 431 diff=abs(val1-val2) 432 if diff>maxDiff: 433 maxDiff=diff 434 maxPos=x[i] 435 sumDiff+=diff 436 weight=0 437 if t>minT: 438 weight+=(t-x[i-1])/2 439 if t<maxT: 440 weight+=(x[i+1]-t)/2 441 sumWeighted+=weight*diff 442 443 return { "max" : maxDiff, 444 "maxPos" : maxPos, 445 "average" : sumDiff/cnt, 446 "wAverage" : sumWeighted/(maxT-minT), 447 "tMin": minT, 448 "tMax": maxT}
449
450 - def metrics(self,name,time=None):
451 """Calculates the metrics for a data set. Returns a dictionary 452 with a number of norms: minimum, maximum, average, average weighted by time 453 @param name: name of the data column to be evaluated. Assumes that that column 454 is ordered in ascending order 455 @param time: name of the time column. If none is given then the first column is assumed""" 456 457 if time==None: 458 time=self.time 459 460 x=self.data[time] 461 y=self.data[name] 462 463 minVal=1e40 464 maxVal=-1e40 465 sum=0 466 sumWeighted=0 467 468 for i,t in enumerate(x): 469 val=y[i] 470 maxVal=max(val,maxVal) 471 minVal=min(val,minVal) 472 sum+=val 473 weight=0 474 if i>0: 475 weight+=(t-x[i-1])/2 476 if i<(len(x)-1): 477 weight+=(x[i+1]-t)/2 478 sumWeighted+=weight*val 479 480 return { "max" : maxVal, 481 "min" : minVal, 482 "average" : sum/len(x), 483 "wAverage" : sumWeighted/(x[-1]-x[0]), 484 "tMin": x[0], 485 "tMax": x[-1]}
486