1
2 """
3 Data that can go into a spreadsheet (title line and rectangular data)
4 """
5
6 import numpy,copy
7
8 from PyFoam.Error import error,FatalErrorPyFoamException,warning
9
13
15 """
16 Collects data that could go into a spreadsheet. The focus of this class is on
17 storing all the data at once
18 """
19 - def __init__(self,
20 csvName=None,
21 txtName=None,
22 data=None,
23 names=None,
24 title=None):
25 """Either this is constructed from a file or from the data and the column headers
26
27 @param csvName: name of the CSV-file the data should be constructed from,
28 @param txtName: name of a file the data should be constructed from,
29 @param data: the actual data to use
30 @param names: the names for the column header
31 @param title: a name that is used to make unique heades names"""
32
33 self.title=title
34
35 if csvName and data:
36 error("SpreadsheetData is either constructed from data or from a file")
37
38 if csvName:
39 try:
40 rec=numpy.recfromcsv(csvName)
41 data=[tuple(float(x) for x in i) for i in rec]
42 names=list(rec.dtype.names)
43 except AttributeError:
44
45 data=map(tuple,numpy.loadtxt(csvName,delimiter=',',skiprows=1))
46 names=open(csvName).readline().strip().split(',')
47
48
49 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names)))
50 elif txtName:
51 try:
52 rec=numpy.recfromtxt(txtName,names=True)
53 data=[tuple(float(x) for x in i) for i in rec]
54 names=list(rec.dtype.names)
55 except AttributeError:
56
57 data=map(tuple,numpy.loadtxt(txtName))
58 names=open(txtName).readline().strip().split()[1:]
59
60
61 self.data=numpy.array(data,dtype=zip(names,['f8']*len(names)))
62 else:
63 if data!=None and names==None:
64 error("No names given for the data")
65
66 self.data=numpy.array(map(tuple,data),dtype=zip(names,['f8']*len(names)))
67
68 if self.title!=None:
69 self.data.dtype.names=[self.data.dtype.names[0]]+map(lambda x:self.title+" "+x,self.data.dtype.names[1:])
70
73
76
77 - def writeCSV(self,fName,
78 delimiter=","):
79 """Write data to a CSV-file
80 @param fName: Name of the file
81 @param delimiter: Delimiter to be used in the CSV-file"""
82
83 f=open(fName,"w")
84 f.write(delimiter.join(self.names())+"\n")
85 numpy.savetxt(f,self.data,delimiter=delimiter)
86
88 """Return the range of times
89 @param time: name of the time. If None the first column is used"""
90 if time==None:
91 time=self.names()[0]
92 t=self.data[time]
93
94 return (t[0],t[-1])
95
96 - def join(self,other,time=None,prefix=None):
97 """Join this object with another. Assume that they have the same
98 amount of rows and that they have one column that designates the
99 time and is called the same and has the same values
100 @param other: the other array
101 @param time: name of the time. If None the first column is used
102 @param prefix: String that is added to the other names. If none is given then
103 the title is used"""
104 if time==None:
105 time=self.names()[0]
106 if prefix==None:
107 prefix=other.title
108 if prefix==None:
109 prefix="other_"
110 else:
111 prefix+="_"
112
113 t1=self.data[time]
114 t2=other.data[time]
115 if len(t1)!=len(t2):
116 raise WrongDataSize()
117 if max(abs(t1-t2))>1e-10:
118 error("Times do not have the same values")
119
120 names=[]
121 data=[]
122 for n in self.names():
123 names.append(n)
124 data.append(self.data[n])
125
126 for n in other.names():
127 if n!=time:
128 if n in self.names():
129 names.append(prefix+n)
130 else:
131 names.append(n)
132 data.append(other.data[n])
133
134 return SpreadsheetData(names=names,
135 data=numpy.array(data).transpose())
136
138 """Convinience function for joining data"""
139 return self.join(other)
140
141 - def append(self,
142 name,
143 data,
144 allowDuplicates=False):
145 """Add another column to the data. Assumes that the number of rows is right
146 @param name: the name of the column
147 @param data: the actual data
148 @param allowDuplicates: If the name already exists make it unique by appending _1, _2 ..."""
149
150 arr = numpy.asarray(data)
151 newname=name
152 if newname in self.names() and allowDuplicates:
153 cnt=1
154 while newname in self.names():
155 newname="%s_%d" % (name,cnt)
156 cnt+=1
157 warning("Changing name",name,"to",newname,"bacause it already exists in the data")
158 newdtype = numpy.dtype(self.data.dtype.descr + [(newname, 'f8')])
159 newrec = numpy.empty(self.data.shape, dtype=newdtype)
160 for field in self.data.dtype.fields:
161 newrec[field] = self.data[field]
162 newrec[name] = arr
163
164 self.data=newrec
165
166 - def __call__(self,
167 t,
168 name,
169 time=None,
170 invalidExtend=False,
171 noInterpolation=False):
172 """'Evaluate' the data at a specific time by linear interpolation
173 @param t: the time at which the data should be evaluated
174 @param name: name of the data column to be evaluated. Assumes that that column
175 is ordered in ascending order
176 @param time: name of the time column. If none is given then the first column is assumed
177 @param invalidExtend: if t is out of the valid range then use the smallest or the biggest value. If False use nan
178 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
179
180 if time==None:
181 time=self.names()[0]
182
183 x=self.data[time]
184 y=self.data[name]
185
186
187 if t<x[0]:
188 if invalidExtend:
189 return y[0]
190 else:
191 return float('nan')
192 elif t>x[-1]:
193 if invalidExtend:
194 return y[-1]
195 else:
196 return float('nan')
197
198 if noInterpolation:
199 if t==x[0]:
200 return y[0]
201 elif t==x[-1]:
202 return y[-1]
203
204 iLow=0
205 iHigh=len(x)-1
206
207 while (iHigh-iLow)>1:
208 iNew = iLow + (iHigh-iLow)/2
209
210 if x[iNew]==t:
211
212 return y[iNew]
213 elif t < x[iNew]:
214 iHigh=iNew
215 else:
216 iLow=iNew
217 if noInterpolation:
218 return float('nan')
219 else:
220 return y[iLow] + (y[iHigh]-y[iLow])*(t-x[iLow])/(x[iHigh]-x[iLow])
221
222 - def addTimes(self,times,time=None,interpolate=False,invalidExtend=False):
223 """Extend the data so that all new times are represented (add rows
224 if they are not there)
225 @param time: the name of the column with the time
226 @param times: the times that shoild be there
227 @param interpolate: interpolate the data in new rows. Otherwise
228 insert 'nan'
229 @param invalidExtend: if t is out of the valid range then use
230 the smallest or the biggest value. If False use nan"""
231
232 if time==None:
233 time=self.names()[0]
234
235 if len(times)==len(self.data[time]):
236 same=True
237 for i in range(len(times)):
238 if times[i]!=self.data[time][i]:
239 same=False
240 break
241 if same:
242
243 return
244
245 newData=[]
246 otherI=0
247 originalI=0
248 while otherI<len(times):
249 goOn=originalI<len(self.data[time])
250 while goOn and times[otherI]>self.data[time][originalI]:
251 newData.append(self.data[originalI])
252 originalI+=1
253 goOn=originalI<len(self.data[time])
254
255 append=True
256 if originalI<len(self.data[time]):
257 if times[otherI]==self.data[time][originalI]:
258 newData.append(self.data[originalI])
259 originalI+=1
260 otherI+=1
261 append=False
262
263 if append:
264 t=times[otherI]
265 newRow=[]
266 for n in self.names():
267 if n==time:
268 newRow.append(t)
269 elif interpolate:
270 newRow.append(self(t,n,time=time,invalidExtend=invalidExtend))
271 else:
272 newRow.append(float('nan'))
273 newData.append(newRow)
274 otherI+=1
275
276 while originalI<len(self.data[time]):
277 newData.append(self.data[originalI])
278 originalI+=1
279
280 self.data=numpy.array(map(tuple,newData),dtype=self.data.dtype)
281
282 - def resample(self,
283 other,
284 name,
285 time=None,
286 invalidExtend=False,
287 extendData=False,
288 noInterpolation=False):
289 """Calculate values from another dataset at the same times as in this data-set
290 @param other: the other data-set
291 @param name: name of the data column to be evaluated. Assumes that that column
292 is ordered in ascending order
293 @param time: name of the time column. If none is given then the first column is assumed
294 @param invalidExtend: see __call__
295 @param extendData: if the time range of x is bigger than the range then extend the range before resampling
296 @param noInterpolation: if t doesn't exactly fit a data-point return 'nan'"""
297 if time==None:
298 time=self.names()[0]
299
300 if extendData and (
301 self.data[time][0] > other.data[time][0] or \
302 self.data[time][-1] < other.data[time][-1]):
303 pre=[]
304 i=0
305 while other.data[time][i] < self.data[time][0]:
306 data=[]
307 for n in self.names():
308 if n==time:
309 data.append(other.data[time][i])
310 else:
311 data.append(float('nan'))
312 pre.append(data)
313 i+=1
314 if i>=len(other.data[time]):
315 break
316 if len(pre)>0:
317 self.data=numpy.concatenate((numpy.array(map(tuple,pre),dtype=self.data.dtype),self.data))
318
319 post=[]
320 i=-1
321 while other.data[time][i] > self.data[time][-1]:
322 data=[]
323 for n in self.names():
324 if n==time:
325 data.append(other.data[time][i])
326 else:
327 data.append(float('nan'))
328 post.append(data)
329 i-=1
330 if abs(i)>=len(other.data[time])+1:
331 break
332
333 post.reverse()
334 if len(post)>0:
335 self.data=numpy.concatenate((self.data,numpy.array(map(tuple,post),dtype=self.data.dtype)))
336
337 result=[]
338
339 for t in self.data[time]:
340 result.append(other(t,name,
341 time=time,
342 invalidExtend=invalidExtend,
343 noInterpolation=noInterpolation))
344
345 return result
346
347 - def compare(self,other,name,time=None):
348 """Compare this data-set with another. The time-points of this dataset are used as
349 a reference. Returns a dictionary with a number of norms: maximum absolute
350 difference, average absolute difference
351 on all timepoints, average absolute difference weighted by time
352 @param other: the other data-set
353 @param name: name of the data column to be evaluated. Assumes that that column
354 is ordered in ascending order
355 @param time: name of the time column. If none is given then the first column is assumed"""
356
357 if time==None:
358 time=self.names()[0]
359
360 x=self.data[time]
361 y=self.data[name]
362 y2=self.resample(other,name,time=time,invalidExtend=True)
363
364 maxDiff=0
365 sumDiff=0
366 sumWeighted=0
367
368 for i,t in enumerate(x):
369 val1=y[i]
370 val2=y2[i]
371 diff=abs(val1-val2)
372 maxDiff=max(diff,maxDiff)
373 sumDiff+=diff
374 weight=0
375 if i>0:
376 weight+=(t-x[i-1])/2
377 if i<(len(x)-1):
378 weight+=(x[i+1]-t)/2
379 sumWeighted+=weight*diff
380
381 return { "max" : maxDiff,
382 "average" : sumDiff/len(x),
383 "wAverage" : sumWeighted/(x[-1]-x[0]),
384 "tMin": x[0],
385 "tMax": x[-1]}
386
388 """Calculates the metrics for a data set. Returns a dictionary
389 with a number of norms: minimum, maximum, average, average weighted by time
390 @param name: name of the data column to be evaluated. Assumes that that column
391 is ordered in ascending order
392 @param time: name of the time column. If none is given then the first column is assumed"""
393
394 if time==None:
395 time=self.names()[0]
396
397 x=self.data[time]
398 y=self.data[name]
399
400 minVal=1e40
401 maxVal=-1e40
402 sum=0
403 sumWeighted=0
404
405 for i,t in enumerate(x):
406 val=y[i]
407 maxVal=max(val,maxVal)
408 minVal=min(val,minVal)
409 sum+=val
410 weight=0
411 if i>0:
412 weight+=(t-x[i-1])/2
413 if i<(len(x)-1):
414 weight+=(x[i+1]-t)/2
415 sumWeighted+=weight*val
416
417 return { "max" : maxVal,
418 "min" : minVal,
419 "average" : sum/len(x),
420 "wAverage" : sumWeighted/(x[-1]-x[0]),
421 "tMin": x[0],
422 "tMax": x[-1]}
423