Skip to content

Commit 6320047

Browse files
reintroduce cohorts, aims, qc to metadata
1 parent d1bad60 commit 6320047

4 files changed

Lines changed: 31 additions & 18 deletions

File tree

malariagen_data/adir1.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class Adir1(AnophelesDataResource):
7979
def __init__(
8080
self,
8181
url=None,
82+
public_url=GCS_DEFAULT_PUBLIC_URL,
8283
bokeh_output_notebook=True,
8384
results_cache=None,
8485
log=sys.stdout,
@@ -94,6 +95,7 @@ def __init__(
9495
):
9596
super().__init__(
9697
url=url,
98+
public_url=public_url,
9799
config_path=CONFIG_PATH,
98100
cohorts_analysis=cohorts_analysis,
99101
aim_analysis=None,

malariagen_data/anoph/sample_metadata.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -621,28 +621,28 @@ def sample_metadata(
621621
# Commented this out as it breaks some things - will fix
622622

623623
# Merge with the sequence QC metadata.
624-
# df_sequence_qc = self.sequence_qc_metadata(
625-
# sample_sets=prepped_sample_sets
626-
# )
624+
df_sequence_qc = self.sequence_qc_metadata(
625+
sample_sets=prepped_sample_sets
626+
)
627627

628628
# Note: merging can change column dtypes
629-
# df_samples = df_samples.merge(
630-
# df_sequence_qc, on="sample_id", sort=False, how="left"
631-
# )
629+
df_samples = df_samples.merge(
630+
df_sequence_qc, on="sample_id", sort=False, how="left"
631+
)
632632

633633
# If available, merge with the AIM metadata.
634-
# if self._aim_analysis:
635-
# df_aim = self.aim_metadata(sample_sets=prepped_sample_sets)
636-
# df_samples = df_samples.merge(
637-
# df_aim, on="sample_id", sort=False, how="left"
638-
# )
634+
if self._aim_analysis:
635+
df_aim = self.aim_metadata(sample_sets=prepped_sample_sets)
636+
df_samples = df_samples.merge(
637+
df_aim, on="sample_id", sort=False, how="left"
638+
)
639639

640640
# If available, merge with the cohorts metadata.
641-
# if self._cohorts_analysis:
642-
# df_cohorts = self.cohorts_metadata(sample_sets=prepped_sample_sets)
643-
# df_samples = df_samples.merge(
644-
# df_cohorts, on="sample_id", sort=False, how="left"
645-
# )
641+
if self._cohorts_analysis:
642+
df_cohorts = self.cohorts_metadata(sample_sets=prepped_sample_sets)
643+
df_samples = df_samples.merge(
644+
df_cohorts, on="sample_id", sort=False, how="left"
645+
)
646646

647647
# Store sample metadata in the cache.
648648
self._cache_sample_metadata[cache_key] = df_samples

malariagen_data/anopheles.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,17 @@ def roh_hmm(
595595
try:
596596
# Load cached numeric data, adding str / obj data again.
597597
results = self.results_cache_get(name=name, params=params)
598-
df_roh = pd.DataFrame(results)
598+
599+
# Reconstruct dataframe
600+
df_roh = pd.DataFrame(
601+
{
602+
"roh_start": results["roh_start"],
603+
"roh_stop": results["roh_stop"],
604+
"roh_length": results["roh_length"],
605+
"roh_is_marginal": results["roh_is_marginal"],
606+
}
607+
)
608+
599609
df_roh["sample_id"] = sample
600610
df_roh["contig"] = resolved_region.contig
601611

@@ -630,6 +640,7 @@ def roh_hmm(
630640
"roh_length",
631641
"roh_is_marginal",
632642
]
643+
633644
self.results_cache_set(
634645
name=name,
635646
params=params,

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)