Skip to content

Commit e5d64a1

Browse files
committed
Numba support
The constraints of Numba meant that datetime conversion cannot occur with the main block processing loop (_process_groups function). This commit replaces the previous use of python datetime object for a bit approach in which date components are stored in a single integer who individual bits can be parsed into the time step components. Conversion to a datetime object now occurs outside of the processing loop prior to output to HDF.
1 parent 8b701e9 commit e5d64a1

1 file changed

Lines changed: 106 additions & 53 deletions

File tree

HSP2tools/readWDM.py

Lines changed: 106 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -102,42 +102,39 @@ def readWDM(wdmfile, hdffile, compress_output=True):
102102
dattr[name] = ''.join([_inttostr(iarray[k]) for k in range(ptr, ptr + length//4)]).strip()
103103

104104
# Get timeseries timebase data
105-
groups = []
105+
records = []
106+
offsets = []
106107
for i in range(pdat+1, pdatv-1):
107108
a = iarray[index+i]
108109
if a != 0:
109-
groups.append(_splitposition(a))
110-
if len(groups) == 0:
110+
record, offset = _splitposition(a)
111+
records.append(record)
112+
offsets.append(offset)
113+
if len(records) == 0:
111114
continue
112115

113-
srec, soffset = groups[0]
114-
start = _splitdate(iarray[srec*512 + soffset])
115-
116-
sprec, spoffset = _splitposition(frepos)
117-
finalindex = sprec * 512 + spoffset
118-
119116
# calculate number of data points in each group, tindex is final index for storage
120117
tgroup = dattr['TGROUP']
121118
tstep = dattr['TSSTEP']
122119
tcode = dattr['TCODE']
123120

124-
#PRT - this code was done to preallocate a numpy array, however WDM file can contain timeseries with irregular timesteps
125-
#so calculating a single steps and preallocating a nparray will lead to issues.
126-
cindex = pd.date_range(start=start, periods=len(groups)+1, freq=freq[tgroup])
127-
tindex = pd.date_range(start=start, end=cindex[-1], freq=str(tstep) + freq[tcode])
128-
counts = np.diff(np.searchsorted(tindex, cindex))
129-
130-
## Write to HDF5 file
131-
dates, values = _process_groups(iarray, farray, groups, tgroup)
121+
records = np.asarray(records)
122+
offsets = np.asarray(offsets)
123+
dates, values = _process_groups(iarray, farray, records, offsets, tgroup)
132124
series = pd.Series(values, index=dates)
125+
index = series.index.to_series()
126+
series.index = index.apply(lambda x: datetime.datetime(*bits_to_date(x)))
127+
133128
dsname = f'TIMESERIES/TS{dsn:03d}'
134129
if compress_output:
135130
series.to_hdf(store, dsname, complib='blosc', complevel=9)
136131
else:
137132
series.to_hdf(store, dsname, format='t', data_columns=True)
138133

139-
data = [str(tindex[0]), str(tindex[-1]), str(tstep) + freq[tcode],
140-
len(series), dattr['TSTYPE'], dattr['TFILL']]
134+
data = [
135+
str(series.index[0]), str(series.index[-1]), str(tstep) + freq[tcode],
136+
len(series), dattr['TSTYPE'], dattr['TFILL']
137+
]
141138
columns = ['Start', 'Stop', 'Freq','Length', 'TSTYPE', 'TFILL']
142139
for x in columns_to_add:
143140
if x in dattr:
@@ -151,77 +148,130 @@ def readWDM(wdmfile, hdffile, compress_output=True):
151148
store.put('TIMESERIES/SUMMARY',dfsummary, format='t', data_columns=True)
152149
return dfsummary
153150

154-
def _todatetime(yr=1900, mo=1, dy=1, hr=0):
155-
if hr == 24:
156-
return datetime.datetime(yr, mo, dy, 23) + pd.Timedelta(1,'h')
157-
else:
158-
return datetime.datetime(yr, mo, dy, hr)
159-
151+
@njit
160152
def _splitdate(x):
161-
return _todatetime(x >> 14, x >> 10 & 0xF, x >> 5 & 0x1F, x & 0x1F) # args: year, month, day, hour
153+
year = int(x >> 14)
154+
month = int(x >> 10 & 0xF)
155+
day = int(x >> 5 & 0x1F)
156+
hour = int(x & 0x1F)
157+
return correct_date(year, month, day, hour, 0,0)
162158

159+
@njit
163160
def _splitcontrol(x):
164161
nval = x >> 16
165-
ltstep = int(x >> 10 & 0x3f) #relative_delta doesn't handle numpy.32bitInt correctly so convert to python
166-
ltcode = int(x >> 7 & 0x7)
162+
ltstep = x >> 10 & 0x3f
163+
ltcode = x >> 7 & 0x7
167164
comp = x >> 5 & 0x3
168165
qual = x & 0x1f
169166
return nval, ltstep, ltcode, comp, qual
170167

168+
@njit
171169
def _splitposition(x):
172170
return((x>>9) - 1, (x&0x1FF) - 1) #args: record, offset
173171

172+
@njit
174173
def _inttostr(i):
175174
return chr(i & 0xFF) + chr(i>>8 & 0xFF) + chr(i>>16 & 0xFF) + chr(i>>24 & 0xFF)
176175

177-
# @jit(nopython=True, cache=True)
178-
def _leap_year(y):
179-
if y % 400 == 0:
176+
@njit
177+
def bits_to_date(x):
178+
year = x >> 26
179+
month = x >> 22 & 0xf
180+
day = x >> 17 & 0x1f
181+
hour = x >> 12 & 0x1f
182+
minute = x >> 6 & 0x3f
183+
second = x & 0x3f
184+
return year, month, day, hour, minute, second
185+
186+
@njit
187+
def date_to_bits(year, month, day, hour, minute, second):
188+
x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
189+
return x
190+
191+
@njit
192+
def increment_date(date, timecode, timestep):
193+
year, month, day, hour, minute, second = bits_to_date(date)
194+
195+
if timecode == 7: year += 100 * timestep
196+
elif timecode == 6 : year += timestep
197+
elif timecode == 5 : month += timestep
198+
elif timecode == 4 : day += timestep
199+
elif timecode == 3 : hour += timestep
200+
elif timecode == 2 : minute += timestep
201+
elif timecode == 1 : second += timestep
202+
203+
return correct_date(year, month, day, hour, minute, second)
204+
205+
@njit
206+
def correct_date(year, month, day, hour, minute, second):
207+
while second >= 60:
208+
second -= 60
209+
minute += 1
210+
while minute >= 60:
211+
minute -= 60
212+
hour += 1
213+
while hour >= 24:
214+
hour -= 24
215+
day += 1
216+
while day > _days_in_month(year, month):
217+
day -= _days_in_month(year, month)
218+
month += 1
219+
while month > 12:
220+
month -= 12
221+
year += 1
222+
return date_to_bits(year, month, day, hour, minute, second)
223+
224+
@njit
225+
def _days_in_month(year, month):
226+
if month > 12: month %= 12
227+
228+
if month in (1,3,5,7,8,10,12):
229+
return 31
230+
elif month in (4,6,9,11):
231+
return 30
232+
elif month == 2:
233+
if _is_leapyear(year): return 29
234+
else: return 28
235+
236+
@njit
237+
def _is_leapyear(year):
238+
if year % 400 == 0:
180239
return True
181-
if y % 100 == 0:
240+
if year % 100 == 0:
182241
return False
183-
if y % 4 == 0:
242+
if year % 4 == 0:
184243
return True
185244
else:
186245
return False
187246

188-
def _deltatime(ltstep, ltcode):
189-
deltat = relativedelta(
190-
years= ltstep if ltcode == 6 else 0, #if need to support 100yrs modify this line
191-
months= ltstep if ltcode == 5 else 0,
192-
days= ltstep if ltcode == 4 else 0,
193-
hours= ltstep if ltcode == 3 else 0,
194-
minutes= ltstep if ltcode == 2 else 0,
195-
seconds= ltstep if ltcode == 1 else 0)
196-
return deltat
197-
198-
def _process_groups(iarray, farray, groups, tgroup):
247+
@njit
248+
def _process_groups(iarray, farray, records, offsets, tgroup):
249+
date_array = [0] #need initialize with a type for numba
250+
value_array = [0.0]
199251

200-
date_array = []
201-
value_array = []
202-
203-
for record, offset in groups:
252+
for i in range(0,len(records)):
253+
record = records[i]
254+
offset = offsets[i]
204255
index = record * 512 + offset
205256
pscfwr = iarray[record * 512 + 3] #should be 0 for last record in timeseries
206257
current_date = _splitdate(iarray[index])
207-
lGroupEndDate = current_date + _deltatime(1, tgroup)
258+
group_enddate = increment_date(current_date, tgroup, 1)
208259
offset +=1
209260
index +=1
210261

211-
while current_date < lGroupEndDate:
262+
while current_date < group_enddate:
212263
nval, ltstep, ltcode, comp, qual = _splitcontrol(iarray[index])
213-
deltat = _deltatime(ltstep, ltcode)
214264
#compressed - only has single value which applies to full range
215265
if comp == 1:
216266
for i in range(0, nval, 1):
217-
current_date = current_date + deltat
267+
current_date = increment_date(current_date, ltcode, ltstep)
218268
date_array.append(current_date)
219269
value_array.append(farray[index + 1])
220270
index += 2
221271
offset +=2
222272
else:
223273
for i in range(0, nval, 1):
224-
current_date = current_date + deltat
274+
current_date = increment_date(current_date, ltcode, ltstep)
225275
date_array.append(current_date)
226276
value_array.append(farray[index + 1 + i])
227277
index += 1 + nval
@@ -233,4 +283,7 @@ def _process_groups(iarray, farray, groups, tgroup):
233283
record = pscfwr
234284
pscfwr = iarray[(record - 1) * 512 + 3] #should be 0 for last record in timeseries
235285

286+
date_array = date_array[1:]
287+
value_array = value_array[1:]
288+
236289
return date_array, value_array

0 commit comments

Comments
 (0)