Skip to content

Commit d0dc5ed

Browse files
committed
addresses the issues in #251
mPnst and mpnstpdx code now build.
1 parent fa1b225 commit d0dc5ed

7 files changed

Lines changed: 36 additions & 21 deletions

File tree

build/docker/Dockerfile.mpnst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM r-base:4.3.2
1+
FROM r-base:4.3.3
22

33
# Set environment to noninteractive
44
ENV DEBIAN_FRONTEND=noninteractive

build/mpnst/00_sample_gen.R

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,25 @@ library(dplyr)
77

88
##adding a command line argument
99
args = commandArgs(trailingOnly=TRUE)
10-
if(length(args)!=2){
11-
stop("Need a sample file and synapse token as argument. Rscript 00_sample_gen.R [samplefile] [synapse token]")
10+
if(length(args) > 1 ){
11+
stop("Up to one argument is allowed. This is the filepath to the previously run samples file.")
12+
}
13+
1214

15+
if (length(args) == 0 || is.na(args[1]) || args[1] == "" || !file.exists(args[1])) {
16+
orig_samples <- ""
17+
} else {
18+
orig_samples <- fread(args[1])
1319
}
1420

15-
orig_samples<-fread(args[1])
1621

17-
synapser::synLogin(authToken=args[2])
22+
# Check if Synapse token is available from the environment
23+
synapse_token <- Sys.getenv("SYNAPSE_AUTH_TOKEN")
24+
if (synapse_token == "") {
25+
stop("Error: SYNAPSE_AUTH_TOKEN environment variable is not set.")
26+
}
27+
28+
synapser::synLogin(authToken=synapse_token)
1829
manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
1930
as.data.frame()
2031

@@ -32,23 +43,18 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
3243
##first create samples for the original tumors
3344
tumorTable<-manifest|>
3445
dplyr::select(common_name='Sample')|>
35-
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
46+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Homo sapiens (Human)',model_type='tumor')|>
3647
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
3748

3849
##then create samples for the PDX
3950
sampTable<-manifest|>
4051
dplyr::select(common_name='Sample',MicroTissueDrugFolder)|>
41-
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
52+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Homo sapiens (Human)',model_type='patient derived xenograft')|>
4253
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
4354

4455

45-
pdxmt<-manifest|>
46-
dplyr::select(common_name='Sample',MicroTissueDrugFolder)|>
47-
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='organoid')|>
48-
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
49-
5056
##third, generate a sample for the MTs if they were generated
51-
#pdxmt<-subset(sampTable,!is.na(MicroTissueDrugFolder))
57+
pdxmt<-subset(sampTable,!is.na(MicroTissueDrugFolder))
5258
pdxmt$model_type=rep('organoid',nrow(pdxmt))
5359
print(pdxmt)
5460

@@ -58,7 +64,15 @@ main<-rbind(sampTable,pdxmt)|>
5864

5965
#main <- fread("mpnst/NF_MPNST_samples.csv")
6066
#previous_aml <- fread(args[1])#"beatAML/beataml_samples.csv")
61-
max_id <- max(orig_samples$improve_sample_id)
67+
68+
# If there is no previous samples file - start at 1, else, continue where the previous one left off.
69+
if (identical(orig_samples, "")) {
70+
max_id <- 1
71+
} else {
72+
max_id <- max(orig_samples$improve_sample_id, na.rm = TRUE)
73+
}
74+
75+
6276
main$improve_sample_id <- seq(from = max_id + 1, length.out = nrow(main))
6377

6478
#synapse_main <- fread("mpnst/synapse_NF-MPNST_samples.csv")

build/mpnst/03_get_drug_response_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ org_samps<-subset(samples_df,model_type=='organoid')
3232

3333
##now get the manifest from synapse
3434
manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
35-
as.data.frame()|>
35+
as.data.table()|>
3636
dplyr::rename(common_name='Sample')
3737

3838

build/mpnst/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ directory. Currently using the test files as input.
1212
`mpnst_samples.csv` file. This pulls from the latest synapse
1313
project metadata table.
1414
```
15-
docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnst sh build_samples.sh /tmp/build/build_test/test_samples.csv
15+
docker run -v $PWD:/tmp -e -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnst sh build_samples.sh /tmp/build/build_test/test_samples.csv
1616
```
1717

1818
3. Pull the data and map it to the samples. This uses the metadata

build/mpnst/build_drugs.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
2-
set -euo pipefail
2+
#set -euo pipefail
33

4-
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
4+
#trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
55

66
echo "Running 02_get_drug_data.R with /tmp/mpnst_drugs.tsv and $1."
77
Rscript 02_get_drug_data.R /tmp/mpnst_drugs.tsv $1

build/mpnst/requirements.r

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ install.packages('remotes')
44
remotes::install_version('rjson', version = '0.2.21', repos = 'https://cloud.r-project.org')
55
install.packages('synapser', repos = c('http://ran.synapse.org', 'https://cloud.r-project.org'))
66
install.packages("dplyr")
7-
install.packages("data.table")
87
install.packages("synapser", repos = c("http://ran.synapse.org", "https://cloud.r-project.org"))
8+
install.packages("data.table")
99
install.packages("R.utils")
1010
install.packages("stringr")
11-
install.packages("tidyr")
11+
install.packages("tidyr")

build/utils/build_drug_desc.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ def main():
8282
cansmiles = [a for a in set(tab.canSMILES) if str(a)!='nan']
8383
# isosmiles = list(set(tab.isoSMILES))
8484
morgs = smiles_to_fingerprint(cansmiles)
85-
85+
# print(morgs)
8686
ids = pd.DataFrame(tab[['improve_drug_id','canSMILES']]).drop_duplicates()
87+
# print(ids)
8788
id_morg = ids.rename({"canSMILES":'smile'},axis=1).merge(morgs)[['improve_drug_id','structural_descriptor','descriptor_value']]
8889

8990
mords = smiles_to_mordred(cansmiles,nproc=ncors)

0 commit comments

Comments
 (0)