Skip to content

Commit ced9224

Browse files
add hashing to roh function
1 parent 57a9b42 commit ced9224

4 files changed

Lines changed: 193 additions & 287 deletions

File tree

malariagen_data/adir1.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ def __init__(
102102
aim_palettes=None,
103103
site_filters_analysis=site_filters_analysis,
104104
discordant_read_calls_analysis=discordant_read_calls_analysis,
105-
default_site_mask="funestus",
106-
default_phasing_analysis="funestus",
107-
default_coverage_calls_analysis="funestus",
105+
default_site_mask="dirus",
106+
default_phasing_analysis="dirus",
107+
default_coverage_calls_analysis="dirus",
108108
bokeh_output_notebook=bokeh_output_notebook,
109109
results_cache=results_cache,
110110
log=log,

malariagen_data/anopheles.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -572,31 +572,68 @@ def roh_hmm(
572572
debug = self._log.debug
573573

574574
resolved_region: Region = parse_single_region(self, region)
575-
del region
576575

577-
debug("compute windowed heterozygosity")
578-
sample_id, sample_set, windows, counts = self._sample_count_het(
576+
name = "roh"
577+
578+
params = dict(
579579
sample=sample,
580-
region=resolved_region,
581-
site_mask=site_mask,
580+
region=region,
582581
window_size=window_size,
582+
site_mask=site_mask,
583583
sample_set=sample_set,
584-
chunks=chunks,
585-
inline_array=inline_array,
586-
)
587-
588-
debug("compute runs of homozygosity")
589-
df_roh = self._roh_hmm_predict(
590-
windows=windows,
591-
counts=counts,
592584
phet_roh=phet_roh,
593585
phet_nonroh=phet_nonroh,
594586
transition=transition,
595-
window_size=window_size,
596-
sample_id=sample_id,
597-
contig=resolved_region.contig,
587+
chunks=chunks,
588+
inline_array=inline_array,
598589
)
599590

591+
del region
592+
593+
try:
594+
# Load cached numeric data, adding str / obj data again.
595+
results = self.results_cache_get(name=name, params=params)
596+
df_roh = pd.DataFrame(results)
597+
df_roh["sample_id"] = sample
598+
df_roh["contig"] = resolved_region.contig
599+
600+
except CacheMiss:
601+
debug("compute windowed heterozygosity")
602+
sample_id, sample_set, windows, counts = self._sample_count_het(
603+
sample=sample,
604+
region=resolved_region,
605+
site_mask=site_mask,
606+
window_size=window_size,
607+
sample_set=sample_set,
608+
chunks=chunks,
609+
inline_array=inline_array,
610+
)
611+
612+
debug("compute runs of homozygosity")
613+
df_roh = self._roh_hmm_predict(
614+
windows=windows,
615+
counts=counts,
616+
phet_roh=phet_roh,
617+
phet_nonroh=phet_nonroh,
618+
transition=transition,
619+
window_size=window_size,
620+
sample_id=sample_id,
621+
contig=resolved_region.contig,
622+
)
623+
624+
# Specify numeric columns to save (saving obj - sample ID and contig - breaks the save.
625+
columns_to_save = [
626+
"roh_start",
627+
"roh_stop",
628+
"roh_length",
629+
"roh_is_marginal",
630+
]
631+
self.results_cache_set(
632+
name=name,
633+
params=params,
634+
results={col: df_roh[col].to_numpy() for col in columns_to_save},
635+
)
636+
600637
return df_roh
601638

602639
@check_types
@@ -1304,7 +1341,7 @@ def ihs_gwss(
13041341
) -> Tuple[np.ndarray, np.ndarray]:
13051342
# change this name if you ever change the behaviour of this function, to
13061343
# invalidate any previously cached data
1307-
name = self._ihs_gwss_cache_name
1344+
name = "roh"
13081345

13091346
params = dict(
13101347
contig=contig,

0 commit comments

Comments
 (0)