Skip to content

Commit 1b19634

Browse files
add hashing to roh function
1 parent ce7b80a commit 1b19634

4 files changed

Lines changed: 2031 additions & 1736 deletions

File tree

malariagen_data/adir1.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ def __init__(
102102
aim_palettes=None,
103103
site_filters_analysis=site_filters_analysis,
104104
discordant_read_calls_analysis=discordant_read_calls_analysis,
105-
default_site_mask="funestus",
106-
default_phasing_analysis="funestus",
107-
default_coverage_calls_analysis="funestus",
105+
default_site_mask="dirus",
106+
default_phasing_analysis="dirus",
107+
default_coverage_calls_analysis="dirus",
108108
bokeh_output_notebook=bokeh_output_notebook,
109109
results_cache=results_cache,
110110
log=log,

malariagen_data/anopheles.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -574,31 +574,68 @@ def roh_hmm(
574574
debug = self._log.debug
575575

576576
resolved_region: Region = parse_single_region(self, region)
577-
del region
578577

579-
debug("compute windowed heterozygosity")
580-
sample_id, sample_set, windows, counts = self._sample_count_het(
578+
name = "roh"
579+
580+
params = dict(
581581
sample=sample,
582-
region=resolved_region,
583-
site_mask=site_mask,
582+
region=region,
584583
window_size=window_size,
584+
site_mask=site_mask,
585585
sample_set=sample_set,
586-
chunks=chunks,
587-
inline_array=inline_array,
588-
)
589-
590-
debug("compute runs of homozygosity")
591-
df_roh = self._roh_hmm_predict(
592-
windows=windows,
593-
counts=counts,
594586
phet_roh=phet_roh,
595587
phet_nonroh=phet_nonroh,
596588
transition=transition,
597-
window_size=window_size,
598-
sample_id=sample_id,
599-
contig=resolved_region.contig,
589+
chunks=chunks,
590+
inline_array=inline_array,
600591
)
601592

593+
del region
594+
595+
try:
596+
# Load cached numeric data, adding str / obj data again.
597+
results = self.results_cache_get(name=name, params=params)
598+
df_roh = pd.DataFrame(results)
599+
df_roh["sample_id"] = sample
600+
df_roh["contig"] = resolved_region.contig
601+
602+
except CacheMiss:
603+
debug("compute windowed heterozygosity")
604+
sample_id, sample_set, windows, counts = self._sample_count_het(
605+
sample=sample,
606+
region=resolved_region,
607+
site_mask=site_mask,
608+
window_size=window_size,
609+
sample_set=sample_set,
610+
chunks=chunks,
611+
inline_array=inline_array,
612+
)
613+
614+
debug("compute runs of homozygosity")
615+
df_roh = self._roh_hmm_predict(
616+
windows=windows,
617+
counts=counts,
618+
phet_roh=phet_roh,
619+
phet_nonroh=phet_nonroh,
620+
transition=transition,
621+
window_size=window_size,
622+
sample_id=sample_id,
623+
contig=resolved_region.contig,
624+
)
625+
626+
# Specify numeric columns to save (saving obj - sample ID and contig - breaks the save.
627+
columns_to_save = [
628+
"roh_start",
629+
"roh_stop",
630+
"roh_length",
631+
"roh_is_marginal",
632+
]
633+
self.results_cache_set(
634+
name=name,
635+
params=params,
636+
results={col: df_roh[col].to_numpy() for col in columns_to_save},
637+
)
638+
602639
return df_roh
603640

604641
@check_types
@@ -1306,7 +1343,7 @@ def ihs_gwss(
13061343
) -> Tuple[np.ndarray, np.ndarray]:
13071344
# change this name if you ever change the behaviour of this function, to
13081345
# invalidate any previously cached data
1309-
name = self._ihs_gwss_cache_name
1346+
name = "roh"
13101347

13111348
params = dict(
13121349
contig=contig,

0 commit comments

Comments
 (0)