|
6 | 6 | import json |
7 | 7 | from pathlib import Path |
8 | 8 |
|
| 9 | +def get_channel_name(image, chan, print_chan_names_only=False): |
| 10 | + channel_aliases = {'DAPI': ['dapi','nuclear'], |
| 11 | + 'ATP1A1/CD45/E-Cadherin': ['boundary'], |
| 12 | + '18S': ['rna', 'RNA'], |
| 13 | + 'AlphaSMA/Vimentin': ['protein'] |
| 14 | + } |
| 15 | + if print_chan_names_only: |
| 16 | + chan_names = sd.models.get_channel_names(image) |
| 17 | + print('Available channel names:') |
| 18 | + for name in chan_names: |
| 19 | + print(f' - {name}') |
| 20 | + return None |
| 21 | + for chan_label, aliases in channel_aliases.items(): |
| 22 | + for alias in aliases: |
| 23 | + if alias.lower() in chan.lower(): |
| 24 | + return chan_label |
| 25 | + return chan |
| 26 | + |
9 | 27 | def get_dataset_paths(dataset_id, |
10 | 28 | data_root=Path('/root/capsule/data'), |
11 | 29 | scratch_root=Path('/root/capsule/scratch'), |
@@ -99,46 +117,44 @@ def add_micron_coord_sys(sdata, pixel_size=None, z_step=None): |
99 | 117 | ) |
100 | 118 | return sdata |
101 | 119 |
|
102 | | -def add_mapped_cells_cols(sdata, mapped_h5ad_path): |
| 120 | +def add_mapped_cells_cols(adata, mapped_adata): |
103 | 121 | import scanpy as sc |
104 | | - mapped_h5ad = sc.read_h5ad(mapped_h5ad_path) |
105 | | - mapping_obs_cols = np.setdiff1d(mapped_h5ad.obs.columns, sdata['table'].obs.columns) |
| 122 | + mapping_obs_cols = np.setdiff1d(mapped_adata.obs.columns, adata.obs.columns) |
106 | 123 | if len(mapping_obs_cols) == 0: |
107 | 124 | print("No new columns to add from mapped data") |
108 | 125 | else: |
109 | 126 | print(f"Adding {len(mapping_obs_cols)} columns from mapped data: {mapping_obs_cols}") |
110 | | - sdata['table'].obs = sdata['table'].obs.merge( |
111 | | - mapped_h5ad.obs[mapping_obs_cols], |
| 127 | + adata.obs = adata.obs.merge( |
| 128 | + mapped_adata.obs[mapping_obs_cols], |
112 | 129 | left_index=True, |
113 | 130 | right_index=True, |
114 | 131 | how='outer' |
115 | 132 | ) |
116 | | - mapping_vars_cols = np.setdiff1d(mapped_h5ad.var.columns, sdata['table'].var.columns) |
| 133 | + mapping_vars_cols = np.setdiff1d(mapped_adata.var.columns, adata.var.columns) |
117 | 134 | if len(mapping_vars_cols) == 0: |
118 | 135 | print("No new columns to add from mapped data") |
119 | 136 | else: |
120 | 137 | print(f"Adding {len(mapping_vars_cols)} columns from mapped data: {mapping_vars_cols}") |
121 | | - sdata['table'].var = sdata['table'].var.merge( |
122 | | - mapped_h5ad.var[mapping_vars_cols], |
| 138 | + adata.var = adata.var.merge( |
| 139 | + mapped_adata.var[mapping_vars_cols], |
123 | 140 | left_index=True, |
124 | 141 | right_index=True, |
125 | 142 | how='outer' |
126 | 143 | ) |
127 | | - return sdata |
| 144 | + return adata |
128 | 145 |
|
129 | | -def add_type_id_columns(sdata, col_name, table_name='table'): |
130 | | - if col_name in sdata[table_name].obs.columns: |
| 146 | +def add_type_id_columns(adata, col_name): |
| 147 | + if col_name in adata.obs.columns: |
131 | 148 | col_id = col_name.replace('name', 'id') |
132 | | - sdata[table_name].obs[col_id] = sdata[table_name].obs[col_name].str.split(' ').str[0].astype('int') |
| 149 | + adata.obs[col_id] = adata.obs[col_name].str.split(' ').str[0].astype('int') |
133 | 150 | print(f"Added {col_id} column") |
134 | 151 | else: |
135 | | - print(f"{col_name} column not found in {table_name}.obs") |
136 | | - return sdata |
| 152 | + print(f"{col_name} column not found in adata.obs") |
| 153 | + return adata |
137 | 154 |
|
138 | | -def add_grouped_types_columns(sdata, |
| 155 | +def add_grouped_types_columns(adata, |
139 | 156 | new_col, |
140 | 157 | type_mappings=None, |
141 | | - table_name='table', |
142 | 158 | null_value='other'): |
143 | 159 | default_mappings = { |
144 | 160 | 'broad_class': { |
@@ -178,14 +194,14 @@ def add_grouped_types_columns(sdata, |
178 | 194 | norm_mappings[crit_col] = norm |
179 | 195 |
|
180 | 196 | # Initialize column |
181 | | - print(f"Adding '{new_col}' to {table_name}.obs") |
182 | | - sdata[table_name].obs[new_col] = null_value |
| 197 | + print(f"Adding '{new_col}' to adata.obs") |
| 198 | + adata.obs[new_col] = null_value |
183 | 199 |
|
184 | 200 | for crit_col, rules in norm_mappings.items(): |
185 | | - if crit_col not in sdata[table_name].obs.columns: |
| 201 | + if crit_col not in adata.obs.columns: |
186 | 202 | # skip missing criteria columns |
187 | 203 | continue |
188 | | - series = sdata[table_name].obs[crit_col] |
| 204 | + series = adata.obs[crit_col] |
189 | 205 |
|
190 | 206 | for rule in rules: |
191 | 207 | op = rule['op'] |
@@ -226,9 +242,9 @@ def add_grouped_types_columns(sdata, |
226 | 242 | # on any evaluation error, skip this rule |
227 | 243 | continue |
228 | 244 |
|
229 | | - sdata[table_name].obs.loc[mask, new_col] = assign |
| 245 | + adata.obs.loc[mask, new_col] = assign |
230 | 246 |
|
231 | | - return sdata |
| 247 | + return adata |
232 | 248 |
|
233 | 249 | def get_transcripts_bboxes(transcripts, id_col='cell_labels'): |
234 | 250 | transcripts = transcripts.compute() if hasattr(transcripts, 'compute') else transcripts |
|
0 commit comments