44import gc
55import pandas as pd
66import numpy as np
7- from shutil import copytree , rmtree
87
98from xenium_analysis_tools .utils .io_utils import (
109 atomic_write_sdata ,
1110 is_complete ,
1211 is_complete_store ,
1312 load_config ,
1413 setup_logging ,
15- get_sections_df
14+ get_sections_df ,
15+ get_partial_dataset
1616)
1717from xenium_analysis_tools .process_xenium .process_spatialdata import read_xenium_slide
1818
@@ -37,39 +37,6 @@ def find_xenium_bundle(bundle_name, data_folder='/root/capsule/data'):
3737 path_to_bundle = found_dirs [0 ]
3838 break
3939 return path_to_bundle
40-
41- def get_data_folder_slides (dataset_name , source_path , dest_path ):
42- """Copy slide data from source to destination, handling incomplete files."""
43- # Find slides
44- slides = list (source_path .glob ('slide_*.zarr' ))
45- if not slides :
46- print (f"No slides found in { source_path } " )
47- return
48-
49- # Create destination directory
50- dest_path .mkdir (parents = True , exist_ok = True )
51-
52- # Copy slides
53- for slide in slides :
54- print (f"Checking { slide .name } ..." )
55- dest_slide = dest_path / slide .name
56-
57- # Skip if destination already complete
58- if dest_slide .exists () and is_complete_store (dest_slide ):
59- print (f"{ slide .name } already complete" )
60- continue
61-
62- # Only copy if source is valid
63- if not is_complete_store (slide ):
64- print (f"{ slide .name } source incomplete, skipping" )
65- continue
66-
67- # Remove incomplete destination and copy
68- if dest_slide .exists ():
69- rmtree (dest_slide )
70-
71- copytree (slide , dest_slide )
72- print (f"Copied { slide .name } " )
7340
7441def generate_slides (dataset_name : str , config_path : str = None , select_sections : list [int ]| None = None ):
7542 """
@@ -99,8 +66,8 @@ def generate_slides(dataset_name: str, config_path: str=None, select_sections: l
9966 if processing_config ['check_data_folder_slides' ]:
10067 logger .info ("Checking and copying slides from data folder if exist..." )
10168 data_folder_slides_path = Path (paths ['data_root' ]) / f'{ dataset_name } { processing_config ["save_initial_dataset_suffix" ]} '
102- get_data_folder_slides ( dataset_name , data_folder_slides_path , save_sections_path )
103-
69+ get_partial_dataset ( data_folder_slides_path , save_sections_path , pattern = 'slide_*' , subset_ids = select_sections )
70+
10471 # Get the slides information
10572 sections_df = get_sections_df (raw_data_folder )
10673
0 commit comments