updated alignment module

hschryver · hschryver · commit 05af8e66b299 · 2026-02-12T22:23:30.000Z
diff --git a/src/xenium_analysis_tools/alignment/confocal_alignment.py b/src/xenium_analysis_tools/alignment/confocal_alignment.py
@@ -39,16 +39,15 @@ def get_confocal_image_sizes(img_name, cf_raw_path, overlap=0.1):
         'physical_pixel_size_z': 1.0 # Placeholder if not in YAML
     }
 
-def generate_confocal_sdata(zarr_path, raw_confocal_path=None):
+def generate_confocal_sdata(zarr_path, raw_confocal_path=None, select_scales=None):
     cf_name = zarr_path.stem
-    cf_dt = create_datatree_from_zarr(zarr_path, chan_name=cf_name)
+    cf_dt = create_datatree_from_zarr(zarr_path, chan_name=cf_name, select_scales=select_scales)
 
     cf_sdata = sd.SpatialData(
             images={cf_name: cf_dt}
     )
 
     if raw_confocal_path:
-        # cf_sizes = get_confocal_image_sizes(cf_name, raw_confocal_path)
         cf_sizes = get_confocal_image_sizes(cf_name, raw_confocal_path)
         cf_sdata[cf_name].attrs.update(cf_sizes)
         if not cf_sizes['physical_pixel_size_x']==cf_sizes['physical_pixel_size_y']:
@@ -58,11 +57,14 @@ def generate_confocal_sdata(zarr_path, raw_confocal_path=None):
 
     return cf_sdata
 
-def create_datatree_from_zarr(zarr_path, chan_name='chan'):
+def create_datatree_from_zarr(zarr_path, chan_name='chan', select_scales=None):
     root = zarr.open_group(zarr_path, mode='r')
     data_tree_obj = xr.DataTree()
 
     for scale_level in sorted(list(root.keys())):
+        if scale_level != '0': #Have to have scale0 to determine sizes, but can drop later
+            if select_scales is not None and scale_level not in select_scales:
+                continue
         # Load the image data at this scale level
         level_array = da.from_zarr(str(zarr_path / scale_level))
         level_array = np.expand_dims(level_array, axis=0)  # Add c dimension
@@ -87,9 +89,16 @@ def create_datatree_from_zarr(zarr_path, chan_name='chan'):
             set_transformation(data_tree_obj[scale_key].image, sequence, to_coordinate_system="global")
         else:
             set_transformation(data_tree_obj[scale_key].image, Identity(), to_coordinate_system="global")
-    return data_tree_obj
 
+    if select_scales is not None and '0' not in select_scales:
+        del data_tree_obj['scale0']
+    if select_scales is not None: # rename scales
+        for n, scale_level in enumerate(list(data_tree_obj.keys())):
+            data_tree_obj[f'scale{n}'] = data_tree_obj[scale_level]
+            data_tree_obj[f'scale{n}'].image.attrs[f'original_scale_level'] = scale_level
+            del data_tree_obj[scale_level]
 
+    return data_tree_obj
 
 # Coped from capsule 4 to keep track of overlap blending code
 def generate_fused_confocal_images(data_asset, overlap=0.1, img_layers=6):
@@ -176,4 +185,25 @@ def generate_fused_confocal_images(data_asset, overlap=0.1, img_layers=6):
         # Define a scaler for creating the image pyramid
         scaler = Scaler(method='nearest', max_layer=img_layers)  # Create 4 levels in the pyramid
         # Write the image data with pyramid
-        write_image(image, root, scaler=scaler, axes = 'zyx')
+        write_image(image, root, scaler=scaler, axes = 'zyx')
+
+def get_confocal_sdata(confocal_zarr_path, raw_confocal_path, select_scales=None):
+    sdatas = []
+    if 'deep' in [zarrs.stem for zarrs in list(confocal_zarr_path.iterdir()) if zarrs.suffix == '.zarr']:
+        print("Generating sdata for deep confocal...")
+        deep_sdata = generate_confocal_sdata(
+            zarr_path = confocal_zarr_path / 'deep.zarr',
+            raw_confocal_path = raw_confocal_path,
+            select_scales=select_scales
+        )
+        sdatas.append(deep_sdata)
+    if 'surface' in [zarrs.stem for zarrs in list(confocal_zarr_path.iterdir()) if zarrs.suffix == '.zarr']:
+        print("Generating sdata for surface confocal...")
+        surface_sdata = generate_confocal_sdata(
+            zarr_path = confocal_zarr_path / 'surface.zarr',
+            raw_confocal_path = raw_confocal_path,
+            select_scales=select_scales
+        )
+        sdatas.append(surface_sdata)
+    confocal_sdata = sd.concatenate(sdatas, merge_coord_systems=True)
+    return confocal_sdata
diff --git a/src/xenium_analysis_tools/alignment/zstack_alignment.py b/src/xenium_analysis_tools/alignment/zstack_alignment.py
@@ -26,77 +26,118 @@ def create_zstack_da(tif_path, name, add_chan=True, dims=("z", "y", "x"), fov_um
     da.attrs |= {f"scale_{d}": s for d, s in zip(dims, pixel_sizes)}
     return da
 
-def get_zstack_sdata(zstack_path, zstack_masks_path, target_size, channel_names=['gcamp', 'dextran']):
-    def get_matching_metas(root):
-        matches = []
-        for d in Path(root).iterdir():
-            if not d.is_dir(): continue
-            meta = _parse_stack_metadata(d) # Extracts size and tif paths
-            if meta['size'] == target_size:
-                matches.append(meta)
-        return sorted(matches, key=lambda x: x['name']) # Sort ensures Channel 0 -> gcamp
-
-    img_metas = get_matching_metas(zstack_path)
-    mask_metas = get_matching_metas(zstack_masks_path)
+def parse_stack_metadata(folder, lookup_chans=['gcamp', 'dextran']):
+    """Extracts size and specific channel name from naming convention."""
+    # Pattern: Matches '400x400x450' and captures the following word (e.g., GCaMP)
+    pattern = r'(\d+)x(\d+)x(\d+)'
+    match = re.search(pattern, folder.name)
+    
+    if match:
+        width, height, depth = match.groups()
+        size = {"width": int(width), "height": int(height), "depth": int(depth)}
+        # Normalize channel name (e.g., GCaMP -> gcamp)
+        detected_channels = [ch for ch in lookup_chans if ch in folder.name.lower()]
+    else:
+        size = {"width": None, "height": None, "depth": None}
+        detected_channels = folder.name # Fallback
     
-    sz = img_metas[0]['size']
+    tifs = {d.name.lower(): list(d.glob("*.tif"))[0] 
+            for d in folder.iterdir() if d.is_dir() and "channel" in d.name.lower()}
+    jsons = {re.sub(r'.*_(registration|roi_groups|scanimage).*', r'\1', f.stem): f 
+             for f in folder.glob("*.json")}
+
+    return {"size": size, "detected_channels": detected_channels, "tifs": tifs, "jsons": jsons, "name": folder.name}
+
+def get_zstack_elements(stack_folder, masks_folder, return_tables=False, chan_mapping=None, add_size_suffix=True):
+    if chan_mapping is None:
+        chan_mapping = {
+            'channel_0_ref_0': 'gcamp',
+            'channel_1_ref_1': 'dextran'
+        }
+
+    # Get metadata for stacks
+    stack_meta = parse_stack_metadata(stack_folder, lookup_chans=list(chan_mapping.values()))
+    sz = stack_meta['size']
     fov = (sz['depth'], sz['height'], sz['width'])
-    images, labels, tables = {}, {}, {}
 
-    for i, img_meta in enumerate(img_metas):
-        chan_name = channel_names[i] if i < len(channel_names) else f"channel_{i}"
-        
-        # Process Image
-        img_tif = next(iter(img_meta['tifs'].values()))
-        img_da = create_zstack_da(img_tif, chan_name, add_chan=True, fov_um=fov)
-        images[chan_name] = Image3DModel.parse(img_da, chunks='auto')
+    # Get stack channel images
+    images = {}
+    for chan, tif_path in stack_meta['tifs'].items():
+        chan_name = chan_mapping.get(chan, chan)
+        img_da = create_zstack_da(tif_path, chan_name, add_chan=True, fov_um=fov)
+        img_da.attrs.update()
+        images[chan_name] = Image3DModel.parse(img_da, 
+                                                c_coords=[chan_name],
+                                                chunks='auto')
+    
+    # If specified to keep names unique, add size suffix
+    if add_size_suffix:
+        size_suffix = f"{fov[0]}x{fov[1]}x{fov[2]}"
+        images = {f"{name}_{size_suffix}": img for name, img in images.items()}
+
+    # Use name of stack to get corresponding masks
+    img_name = stack_meta['name'].split('_registered')[0]
+    all_masks = list(masks_folder.iterdir())
+    matched_masks_path = [m for m in all_masks if img_name in m.name]
+    if matched_masks_path:
+        matched_masks_path = matched_masks_path[0] if len(matched_masks_path) == 1 else None
+    if matched_masks_path is None:
+        print(f"No matching mask found for {img_name} in {masks_folder}")
 
-        # Process Labels & Tables
-        if i < len(mask_metas):
-            mask_meta = mask_metas[i]
-            mask_tif = next(iter(mask_meta['tifs'].values()))
-            label_key = f"{chan_name}_labels"
-            
-            mask_da = create_zstack_da(mask_tif, label_key, add_chan=False, fov_um=fov)
-            labels[label_key] = Labels3DModel.parse(mask_da, chunks='auto')
-            
-            # Table logic
+    # Get mask metadata
+    masks_meta = parse_stack_metadata(matched_masks_path, lookup_chans=list(chan_mapping.values()))
+
+    # Get labels and tables for each mask channel
+    labels = {}
+    tables = {}
+    for chan, tif_path in masks_meta['tifs'].items():
+        chan_name = chan_mapping.get(chan, chan)
+        labels_name = f"{chan_name}_labels"
+        if add_size_suffix:
+            labels_name = f"{labels_name}_{size_suffix}"
+        mask_da = create_zstack_da(tif_path, labels_name, add_chan=False, fov_um=fov)
+        labels[labels_name] = Labels3DModel.parse(mask_da, chunks='auto')  
+        if return_tables:
+        # Corresponding table
             unique_ids = np.unique(mask_da.values)
             unique_ids = unique_ids[unique_ids > 0]
             obs = pd.DataFrame(unique_ids, columns=[f"{chan_name}_id"], index=unique_ids.astype(str))
-            obs['region'] = label_key
+            obs['region'] = labels_name
             ann = ad.AnnData(obs=obs)
-            tables[f"{chan_name}_cells"] = TableModel.parse(ann, region=label_key, region_key='region', instance_key=f"{chan_name}_id")
+            table_name = f"{chan_name}_table"
+            if add_size_suffix:
+                table_name = f"{table_name}_{size_suffix}"
+            tables[table_name] = TableModel.parse(ann, region=labels_name, region_key='region', instance_key=f"{chan_name}_id")
+    
+    return images, labels, tables
 
-    sdata = sd.SpatialData(images=images, labels=labels, tables=tables)
+def get_zstacks_sdata(stacks_folder, masks_folder, return_tables=False, chan_mapping=None):
+    if chan_mapping is None:
+        chan_mapping = {
+            'channel_0_ref_0': 'gcamp',
+            'channel_1_ref_1': 'dextran'
+        }
+    all_stacks = list(stacks_folder.iterdir())
+    combined_images = {}
+    combined_labels = {}
+    combined_tables = {}
+    if len(all_stacks) > 1:
+        add_size_suffix = True
+    else:
+        add_size_suffix = False
+    for zstack_folder in all_stacks:
+        images, labels, tables = get_zstack_elements(zstack_folder, masks_folder, return_tables=return_tables, chan_mapping=chan_mapping, add_size_suffix=add_size_suffix)            
+        combined_images.update(images)
+        combined_labels.update(labels)
+        if tables:
+            combined_tables.update(tables)
     
+    # Combine into SpatialData         
+    sdata = sd.SpatialData(images=combined_images, labels=combined_labels, tables=combined_tables)
     # Apply Transformations
     for el_type in ['images', 'labels']:
         for name, el in getattr(sdata, el_type).items():
             set_transformation(el, Identity(), "global")
             scale = Scale([el.attrs[f"scale_{d}"] for d in ['z', 'y', 'x']], axes=('z', 'y', 'x'))
             set_transformation(el, scale, "microns")
-            
-    return sdata
-
-def _parse_stack_metadata(folder):
-    """Extracts size and specific channel name from the Allen Institute folder naming convention."""
-    # Pattern: Matches '400x400x450' and captures the following word (e.g., GCaMP)
-    pattern = r'(\d+)x(\d+)x(\d+)-([^_]+)'
-    match = re.search(pattern, folder.name)
-    
-    if match:
-        width, height, depth, channel = match.groups()
-        size = {"width": int(width), "height": int(height), "depth": int(depth)}
-        # Normalize channel name (e.g., GCaMP -> gcamp)
-        detected_channel = channel.lower() 
-    else:
-        size = {"width": None, "height": None, "depth": None}
-        detected_channel = folder.name # Fallback
-    
-    tifs = {d.name.lower(): list(d.glob("*.tif"))[0] 
-            for d in folder.iterdir() if d.is_dir() and "channel" in d.name.lower()}
-    jsons = {re.sub(r'.*_(registration|roi_groups|scanimage).*', r'\1', f.stem): f 
-             for f in folder.glob("*.json")}
-    
-    return {"size": size, "detected_channel": detected_channel, "tifs": tifs, "jsons": jsons, "name": folder.name}
+    return sdata
diff --git a/src/xenium_analysis_tools/utils/sd_utils.py b/src/xenium_analysis_tools/utils/sd_utils.py
@@ -40,11 +40,11 @@ def get_dataset_paths(dataset_id,
         "scratch_root": scratch_root,
         "results_root": results_root,
         "xenium_dataset_name": dataset_config.get("xenium_name", None),
-        "sdata_path": data_root / f'{dataset_config.get("xenium_name", None)}_processed',
-        "confocal_path": data_root / dataset_config.get("confocal_name", None),
-        "raw_confocal_path": data_root / dataset_config.get("raw_confocal_name", None),
-        "zstack_path": data_root / dataset_config.get("zstack_name", None),
-        "zstack_masks": data_root / dataset_config.get("zstack_masks_name", None),
+        "sdata_path": data_root / f'{dataset_config["xenium_name"]}_processed' if dataset_config.get("xenium_name") else None,
+        "confocal_path": data_root / dataset_config["confocal_name"] if dataset_config.get("confocal_name") else None,
+        "raw_confocal_path": data_root / dataset_config["raw_confocal_name"] if dataset_config.get("raw_confocal_name") else None,
+        "zstack_path": data_root / dataset_config["zstack_name"] if dataset_config.get("zstack_name") else None,
+        "zstack_masks": data_root / dataset_config["zstack_masks_name"] if dataset_config.get("zstack_masks_name") else None,
     }
     
     return paths