@@ -293,3 +293,83 @@ def open_zarr(
293293 new_data = {k : _TensorStoreAdapter (v ) for k , v in arrays .items ()}
294294
295295 return ds .copy (data = new_data )
296+
297+
298+ def _tensorstore_open_concatenated_zarrs (
299+ paths : list [str ],
300+ data_vars : list [str ],
301+ concat_axes : list [int ],
302+ context : tensorstore .Context ,
303+ ) -> dict [str , tensorstore .TensorStore ]:
304+ """Open multiple zarrs with TensorStore.
305+
306+ Args:
307+ paths: List of paths to zarr stores.
308+ data_vars: List of data variable names to open.
309+ concat_axes: List of axes along which to concatenate the data variables.
310+ context: TensorStore context.
311+ """
312+ # Open all arrays in all datasets using tensorstore
313+ arrays_list = []
314+ for path in paths :
315+ zarr_format = _get_zarr_format (path )
316+ specs = {k : _zarr_spec_from_path (os .path .join (path , k ), zarr_format ) for k in data_vars }
317+ array_futures = {
318+ k : tensorstore .open (spec , read = True , write = False , context = context )
319+ for k , spec in specs .items ()
320+ }
321+ arrays_list .append (array_futures )
322+
323+ # Concatenate the tensorstore arrays
324+ arrays = {}
325+ for k , axis in zip (data_vars , concat_axes , strict = True ):
326+ datasets = [array_futures [k ].result () for array_futures in arrays_list ]
327+ arrays [k ] = tensorstore .concat (datasets , axis = axis )
328+
329+ return arrays
330+
331+
332+ def open_concatenated_zarrs (
333+ paths : list [str ],
334+ concat_dim : str ,
335+ * ,
336+ context : tensorstore .Context | None = None ,
337+ mask_and_scale : bool = True ,
338+ ) -> xarray .Dataset :
339+ """Open an xarray.Dataset whilst concatenating multiple Zarr using TensorStore.
340+
341+ Notes:
342+ This function depends on the Dask package.
343+
344+ Args:
345+ paths: List of paths to zarr stores.
346+ concat_dim: Dimension along which to concatenate the data variables.
347+ context: TensorStore context.
348+ mask_and_scale: Whether to mask and scale the data.
349+
350+ Returns:
351+ Concatentated Dataset with all data variables opened via TensorStore.
352+ """
353+ if context is None :
354+ context = tensorstore .Context ()
355+
356+ ds = xarray .open_mfdataset (
357+ paths ,
358+ concat_dim = concat_dim ,
359+ combine = "nested" ,
360+ mask_and_scale = mask_and_scale ,
361+ engine = "zarr"
362+ )
363+
364+ if mask_and_scale :
365+ # Data variables get replaced below with _TensorStoreAdapter arrays, which
366+ # don't get masked or scaled. Raising an error avoids surprising users with
367+ # incorrect data values.
368+ _raise_if_mask_and_scale_used_for_data_vars (ds )
369+
370+ data_vars = list (ds .data_vars )
371+ concat_axes = [ds [v ].dims .index (concat_dim ) for v in data_vars ]
372+ arrays = _tensorstore_open_concatenated_zarrs (paths , data_vars , concat_axes , context )
373+ new_data = {k : _TensorStoreAdapter (v ) for k , v in arrays .items ()}
374+
375+ return ds .copy (data = new_data )
0 commit comments