Skip to content

Commit f9f67aa

Browse files
committed
updated generate_slides to find partial datasets
1 parent 9402e9f commit f9f67aa

2 files changed

Lines changed: 50 additions & 38 deletions

File tree

src/xenium_analysis_tools/process_xenium/generate_dataset_slides.py

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
import gc
55
import pandas as pd
66
import numpy as np
7-
from shutil import copytree, rmtree
87

98
from xenium_analysis_tools.utils.io_utils import (
109
atomic_write_sdata,
1110
is_complete,
1211
is_complete_store,
1312
load_config,
1413
setup_logging,
15-
get_sections_df
14+
get_sections_df,
15+
get_partial_dataset
1616
)
1717
from xenium_analysis_tools.process_xenium.process_spatialdata import read_xenium_slide
1818

@@ -37,39 +37,6 @@ def find_xenium_bundle(bundle_name, data_folder='/root/capsule/data'):
3737
path_to_bundle = found_dirs[0]
3838
break
3939
return path_to_bundle
40-
41-
def get_data_folder_slides(dataset_name, source_path, dest_path):
42-
"""Copy slide data from source to destination, handling incomplete files."""
43-
# Find slides
44-
slides = list(source_path.glob('slide_*.zarr'))
45-
if not slides:
46-
print(f"No slides found in {source_path}")
47-
return
48-
49-
# Create destination directory
50-
dest_path.mkdir(parents=True, exist_ok=True)
51-
52-
# Copy slides
53-
for slide in slides:
54-
print(f"Checking {slide.name}...")
55-
dest_slide = dest_path / slide.name
56-
57-
# Skip if destination already complete
58-
if dest_slide.exists() and is_complete_store(dest_slide):
59-
print(f"{slide.name} already complete")
60-
continue
61-
62-
# Only copy if source is valid
63-
if not is_complete_store(slide):
64-
print(f"{slide.name} source incomplete, skipping")
65-
continue
66-
67-
# Remove incomplete destination and copy
68-
if dest_slide.exists():
69-
rmtree(dest_slide)
70-
71-
copytree(slide, dest_slide)
72-
print(f"Copied {slide.name}")
7340

7441
def generate_slides(dataset_name: str, config_path: str=None, select_sections: list[int]|None = None):
7542
"""
@@ -99,8 +66,8 @@ def generate_slides(dataset_name: str, config_path: str=None, select_sections: l
9966
if processing_config['check_data_folder_slides']:
10067
logger.info("Checking and copying slides from data folder if exist...")
10168
data_folder_slides_path = Path(paths['data_root']) / f'{dataset_name}{processing_config["save_initial_dataset_suffix"]}'
102-
get_data_folder_slides(dataset_name, data_folder_slides_path, save_sections_path)
103-
69+
get_partial_dataset(data_folder_slides_path, save_sections_path, pattern='slide_*', subset_ids=select_sections)
70+
10471
# Get the slides information
10572
sections_df = get_sections_df(raw_data_folder)
10673

src/xenium_analysis_tools/utils/io_utils.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import sys
77
import pandas as pd
8+
from shutil import copytree, rmtree
89

910
def load_config(config_path=None):
1011
if config_path is not None:
@@ -140,4 +141,48 @@ def safe_copy_tree(src: Path, dst: Path):
140141
return
141142
shutil.rmtree(dst)
142143

143-
shutil.copytree(src, dst)
144+
shutil.copytree(src, dst)
145+
146+
def get_partial_dataset(source_path, dest_path, pattern='section_*', subset_ids=None):
147+
"""Copy slide data from source to destination, handling incomplete files."""
148+
# Find matches
149+
all_matches = list(source_path.glob(pattern))
150+
151+
# Filter matches to only include sections in subset_ids
152+
if subset_ids is not None:
153+
matches = []
154+
for m in all_matches:
155+
section_ids = m.stem.split('_')[1:]
156+
if any(int(sid) in subset_ids for sid in section_ids):
157+
matches.append(m)
158+
else:
159+
matches = all_matches
160+
161+
if not matches:
162+
print(f"No matches found in {source_path}")
163+
return
164+
165+
# Create destination directory
166+
dest_path.mkdir(parents=True, exist_ok=True)
167+
168+
# Copy slides
169+
for ma in matches:
170+
print(f"Checking {ma.name}...")
171+
dest_slide = dest_path / ma.name
172+
173+
# Skip if destination already complete
174+
if dest_slide.exists() and is_complete_store(dest_slide):
175+
print(f"{ma.name} already complete")
176+
continue
177+
178+
# Only copy if source is valid
179+
if not is_complete_store(ma):
180+
print(f"{ma.name} source incomplete, skipping")
181+
continue
182+
183+
# Remove incomplete destination and copy
184+
if dest_slide.exists():
185+
rmtree(dest_slide)
186+
187+
copytree(ma, dest_slide)
188+
print(f"Copied {ma.name}")

0 commit comments

Comments
 (0)