@@ -102,42 +102,39 @@ def readWDM(wdmfile, hdffile, compress_output=True):
102102 dattr [name ] = '' .join ([_inttostr (iarray [k ]) for k in range (ptr , ptr + length // 4 )]).strip ()
103103
104104 # Get timeseries timebase data
105- groups = []
105+ records = []
106+ offsets = []
106107 for i in range (pdat + 1 , pdatv - 1 ):
107108 a = iarray [index + i ]
108109 if a != 0 :
109- groups .append (_splitposition (a ))
110- if len (groups ) == 0 :
110+ record , offset = _splitposition (a )
111+ records .append (record )
112+ offsets .append (offset )
113+ if len (records ) == 0 :
111114 continue
112115
113- srec , soffset = groups [0 ]
114- start = _splitdate (iarray [srec * 512 + soffset ])
115-
116- sprec , spoffset = _splitposition (frepos )
117- finalindex = sprec * 512 + spoffset
118-
119116 # calculate number of data points in each group, tindex is final index for storage
120117 tgroup = dattr ['TGROUP' ]
121118 tstep = dattr ['TSSTEP' ]
122119 tcode = dattr ['TCODE' ]
123120
124- #PRT - this code was done to preallocate a numpy array, however WDM file can contain timeseries with irregular timesteps
125- #so calculating a single steps and preallocating a nparray will lead to issues.
126- cindex = pd .date_range (start = start , periods = len (groups )+ 1 , freq = freq [tgroup ])
127- tindex = pd .date_range (start = start , end = cindex [- 1 ], freq = str (tstep ) + freq [tcode ])
128- counts = np .diff (np .searchsorted (tindex , cindex ))
129-
130- ## Write to HDF5 file
131- dates , values = _process_groups (iarray , farray , groups , tgroup )
121+ records = np .asarray (records )
122+ offsets = np .asarray (offsets )
123+ dates , values = _process_groups (iarray , farray , records , offsets , tgroup )
132124 series = pd .Series (values , index = dates )
125+ index = series .index .to_series ()
126+ series .index = index .apply (lambda x : datetime .datetime (* bits_to_date (x )))
127+
133128 dsname = f'TIMESERIES/TS{ dsn :03d} '
134129 if compress_output :
135130 series .to_hdf (store , dsname , complib = 'blosc' , complevel = 9 )
136131 else :
137132 series .to_hdf (store , dsname , format = 't' , data_columns = True )
138133
139- data = [str (tindex [0 ]), str (tindex [- 1 ]), str (tstep ) + freq [tcode ],
140- len (series ), dattr ['TSTYPE' ], dattr ['TFILL' ]]
134+ data = [
135+ str (series .index [0 ]), str (series .index [- 1 ]), str (tstep ) + freq [tcode ],
136+ len (series ), dattr ['TSTYPE' ], dattr ['TFILL' ]
137+ ]
141138 columns = ['Start' , 'Stop' , 'Freq' ,'Length' , 'TSTYPE' , 'TFILL' ]
142139 for x in columns_to_add :
143140 if x in dattr :
@@ -151,77 +148,130 @@ def readWDM(wdmfile, hdffile, compress_output=True):
151148 store .put ('TIMESERIES/SUMMARY' ,dfsummary , format = 't' , data_columns = True )
152149 return dfsummary
153150
154- def _todatetime (yr = 1900 , mo = 1 , dy = 1 , hr = 0 ):
155- if hr == 24 :
156- return datetime .datetime (yr , mo , dy , 23 ) + pd .Timedelta (1 ,'h' )
157- else :
158- return datetime .datetime (yr , mo , dy , hr )
159-
151+ @njit
160152def _splitdate (x ):
161- return _todatetime (x >> 14 , x >> 10 & 0xF , x >> 5 & 0x1F , x & 0x1F ) # args: year, month, day, hour
153+ year = int (x >> 14 )
154+ month = int (x >> 10 & 0xF )
155+ day = int (x >> 5 & 0x1F )
156+ hour = int (x & 0x1F )
157+ return correct_date (year , month , day , hour , 0 ,0 )
162158
159+ @njit
163160def _splitcontrol (x ):
164161 nval = x >> 16
165- ltstep = int ( x >> 10 & 0x3f ) #relative_delta doesn't handle numpy.32bitInt correctly so convert to python
166- ltcode = int ( x >> 7 & 0x7 )
162+ ltstep = x >> 10 & 0x3f
163+ ltcode = x >> 7 & 0x7
167164 comp = x >> 5 & 0x3
168165 qual = x & 0x1f
169166 return nval , ltstep , ltcode , comp , qual
170167
168+ @njit
171169def _splitposition (x ):
172170 return ((x >> 9 ) - 1 , (x & 0x1FF ) - 1 ) #args: record, offset
173171
172+ @njit
174173def _inttostr (i ):
175174 return chr (i & 0xFF ) + chr (i >> 8 & 0xFF ) + chr (i >> 16 & 0xFF ) + chr (i >> 24 & 0xFF )
176175
177- # @jit(nopython=True, cache=True)
178- def _leap_year (y ):
179- if y % 400 == 0 :
176+ @njit
177+ def bits_to_date (x ):
178+ year = x >> 26
179+ month = x >> 22 & 0xf
180+ day = x >> 17 & 0x1f
181+ hour = x >> 12 & 0x1f
182+ minute = x >> 6 & 0x3f
183+ second = x & 0x3f
184+ return year , month , day , hour , minute , second
185+
186+ @njit
187+ def date_to_bits (year , month , day , hour , minute , second ):
188+ x = year << 26 | month << 22 | day << 17 | hour << 12 | minute << 6 | second
189+ return x
190+
191+ @njit
192+ def increment_date (date , timecode , timestep ):
193+ year , month , day , hour , minute , second = bits_to_date (date )
194+
195+ if timecode == 7 : year += 100 * timestep
196+ elif timecode == 6 : year += timestep
197+ elif timecode == 5 : month += timestep
198+ elif timecode == 4 : day += timestep
199+ elif timecode == 3 : hour += timestep
200+ elif timecode == 2 : minute += timestep
201+ elif timecode == 1 : second += timestep
202+
203+ return correct_date (year , month , day , hour , minute , second )
204+
205+ @njit
206+ def correct_date (year , month , day , hour , minute , second ):
207+ while second >= 60 :
208+ second -= 60
209+ minute += 1
210+ while minute >= 60 :
211+ minute -= 60
212+ hour += 1
213+ while hour >= 24 :
214+ hour -= 24
215+ day += 1
216+ while day > _days_in_month (year , month ):
217+ day -= _days_in_month (year , month )
218+ month += 1
219+ while month > 12 :
220+ month -= 12
221+ year += 1
222+ return date_to_bits (year , month , day , hour , minute , second )
223+
224+ @njit
225+ def _days_in_month (year , month ):
226+ if month > 12 : month %= 12
227+
228+ if month in (1 ,3 ,5 ,7 ,8 ,10 ,12 ):
229+ return 31
230+ elif month in (4 ,6 ,9 ,11 ):
231+ return 30
232+ elif month == 2 :
233+ if _is_leapyear (year ): return 29
234+ else : return 28
235+
236+ @njit
237+ def _is_leapyear (year ):
238+ if year % 400 == 0 :
180239 return True
181- if y % 100 == 0 :
240+ if year % 100 == 0 :
182241 return False
183- if y % 4 == 0 :
242+ if year % 4 == 0 :
184243 return True
185244 else :
186245 return False
187246
188- def _deltatime (ltstep , ltcode ):
189- deltat = relativedelta (
190- years = ltstep if ltcode == 6 else 0 , #if need to support 100yrs modify this line
191- months = ltstep if ltcode == 5 else 0 ,
192- days = ltstep if ltcode == 4 else 0 ,
193- hours = ltstep if ltcode == 3 else 0 ,
194- minutes = ltstep if ltcode == 2 else 0 ,
195- seconds = ltstep if ltcode == 1 else 0 )
196- return deltat
197-
198- def _process_groups (iarray , farray , groups , tgroup ):
247+ @njit
248+ def _process_groups (iarray , farray , records , offsets , tgroup ):
249+ date_array = [0 ] #need initialize with a type for numba
250+ value_array = [0.0 ]
199251
200- date_array = []
201- value_array = []
202-
203- for record , offset in groups :
252+ for i in range (0 ,len (records )):
253+ record = records [i ]
254+ offset = offsets [i ]
204255 index = record * 512 + offset
205256 pscfwr = iarray [record * 512 + 3 ] #should be 0 for last record in timeseries
206257 current_date = _splitdate (iarray [index ])
207- lGroupEndDate = current_date + _deltatime ( 1 , tgroup )
258+ group_enddate = increment_date ( current_date , tgroup , 1 )
208259 offset += 1
209260 index += 1
210261
211- while current_date < lGroupEndDate :
262+ while current_date < group_enddate :
212263 nval , ltstep , ltcode , comp , qual = _splitcontrol (iarray [index ])
213- deltat = _deltatime (ltstep , ltcode )
214264 #compressed - only has single value which applies to full range
215265 if comp == 1 :
216266 for i in range (0 , nval , 1 ):
217- current_date = current_date + deltat
267+ current_date = increment_date ( current_date , ltcode , ltstep )
218268 date_array .append (current_date )
219269 value_array .append (farray [index + 1 ])
220270 index += 2
221271 offset += 2
222272 else :
223273 for i in range (0 , nval , 1 ):
224- current_date = current_date + deltat
274+ current_date = increment_date ( current_date , ltcode , ltstep )
225275 date_array .append (current_date )
226276 value_array .append (farray [index + 1 + i ])
227277 index += 1 + nval
@@ -233,4 +283,7 @@ def _process_groups(iarray, farray, groups, tgroup):
233283 record = pscfwr
234284 pscfwr = iarray [(record - 1 ) * 512 + 3 ] #should be 0 for last record in timeseries
235285
286+ date_array = date_array [1 :]
287+ value_array = value_array [1 :]
288+
236289 return date_array , value_array
0 commit comments