Skip to content

Commit 509b170

Browse files
committed
Dockerfile optimization. Attempting to fix broad_sanger. Hide warnings in drug descriptor file.
1 parent 4d74714 commit 509b170

17 files changed

Lines changed: 106 additions & 94 deletions

build/broad_sanger/02-broadSangerOmics.R

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,60 @@ library(httr2)
1010
Sys.setenv(VROOM_CONNECTION_SIZE=100000000)
1111

1212

13-
# Robust download with retry and optional content-length validation
14-
robust_download_httr2 <- function(url, dest, max_tries = 5, timeout_secs = 120) {
13+
14+
robust_download_httr2 <- function(url, dest,
15+
max_tries = 5,
16+
timeout_secs = 1500) {
17+
# browser-style User-Agent
18+
message("Downloading: ", url)
19+
ua <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
20+
1521
req <- request(url) |>
22+
req_headers(
23+
`User-Agent` = ua,
24+
Accept = "application/octet-stream"
25+
) |>
1626
req_timeout(timeout_secs) |>
17-
req_retry(max_tries = max_tries, retry_on_failure = TRUE)
18-
19-
resp <- req |> req_perform(path = dest) # streams to dest; errors on 4xx/5xx automatically
27+
req_retry(max_tries = max_tries, retry_on_failure = TRUE) |>
28+
req_verbose() # ← turn on full curl logging
29+
30+
resp <- tryCatch(
31+
{
32+
req |> req_perform(path = dest)
33+
},
34+
error = function(e) {
35+
message("🚨 Download failed for: ", url)
36+
message(" • curl error: ", e$message)
37+
if (file.exists(dest)) {
38+
message(" • partial file size: ",
39+
file.info(dest)$size, " bytes")
40+
}
41+
stop(e) # re-throw so your script still aborts
42+
}
43+
)
2044

21-
# Validate content length if provided
45+
# if we get here, the download succeeded; sanity-check length
2246
hdrs <- resp |> resp_headers()
2347
if (!is.null(hdrs$`content-length`)) {
2448
expected <- as.numeric(hdrs$`content-length`)
25-
actual <- file.info(dest)$size
26-
if (is.na(actual) || actual != expected) {
27-
stop(sprintf("Incomplete download for %s: expected %d bytes but got %d", url, expected, actual))
49+
actual <- file.info(dest)$size
50+
if (actual != expected) {
51+
stop(sprintf(
52+
"Incomplete download for %s: expected %d bytes but got %d",
53+
url, expected, actual
54+
))
2855
}
2956
}
3057

3158
invisible(dest)
3259
}
3360

61+
62+
63+
64+
3465
# Helper to download a ZIP and extract it safely
35-
download_and_extract_zip_httr2 <- function(url, dest_zip, extract_dir, max_tries = 5, timeout_secs = 120) {
66+
download_and_extract_zip_httr2 <- function(url, dest_zip, extract_dir, max_tries = 5, timeout_secs = 1500) {
3667
robust_download_httr2(url, dest_zip, max_tries = max_tries, timeout_secs = timeout_secs)
3768
if (!file.exists(dest_zip)) stop(sprintf("Download failed, %s missing", dest_zip))
3869
tryCatch({

build/broad_sanger/exp_requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ scikit-learn
77
scipy
88
requests
99
openpyxl
10-
polars
10+
polars-lts-cpu

build/broad_sanger/requirements.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@ scikit-learn
77
scipy
88
requests
99
openpyxl
10-
polars==0.19.17
10+
polars-lts-cpu
1111
mordredcommunity
1212
rdkit
1313
coderdata==0.1.40
14-
psutil
15-
polars
16-
urllib3
14+
psutil

build/build_all.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -79,30 +79,6 @@ def run_docker_cmd(cmd_arr,filename):
7979
print(filename+' retrieved')
8080

8181

82-
# def process_docker():
83-
# '''
84-
# Build all docker images using docker compose
85-
# All output and errors are logged at local/docker.log
86-
# '''
87-
# compose_file = 'build/docker/docker-compose.yml'
88-
# compose_command = ['docker', 'compose', '-f', compose_file, 'build', '--parallel']
89-
# log_file_path = 'local/docker.log'
90-
# env = os.environ.copy()
91-
# print(f"Docker-compose is building all images. View output in {log_file_path}.")
92-
# with open(log_file_path, 'w') as log_file:
93-
# # Execute the docker-compose command
94-
# res = subprocess.run(compose_command, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
95-
# # Log both stdout and stderr to the log file
96-
# log_file.write(res.stdout)
97-
# if res.returncode != 0:
98-
# log_file.write("Docker compose build failed.\n")
99-
# print(f"Docker compose build failed. See {log_file_path} for details.")
100-
# exit(1)
101-
# else:
102-
# log_file.write("Docker images built successfully.\n")
103-
# print(f"Docker images built successfully. Details logged in {log_file_path}")
104-
105-
10682
def process_docker(datasets):
10783
'''
10884
Build specific docker images using docker-compose based on the dataset argument.

build/docker/Dockerfile.beataml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,21 @@ FROM python:3.9
22

33
WORKDIR /usr/src/app
44

5+
# Set MPLCONFIGDIR to a writable directory
6+
ENV MPLCONFIGDIR=/app/tmp/matplotlib
7+
RUN mkdir -p /app/tmp/matplotlib
8+
9+
COPY build/beatAML/requirements.txt .
10+
RUN pip install --no-cache-dir -r requirements.txt
11+
12+
# CMD python GetBeatAML.py --token ${SYNAPSE_TOKEN}
513

614
COPY build/beatAML/GetBeatAML.py .
715
COPY build/utils/fit_curve.py .
816
COPY build/utils/build_drug_desc.py .
917
COPY build/utils/pubchem_retrieval.py .
1018
COPY build/utils/tpmFromCounts.py .
1119
COPY build/beatAML/*sh ./
12-
COPY build/beatAML/requirements.txt .
1320

14-
# Set MPLCONFIGDIR to a writable directory
15-
ENV MPLCONFIGDIR=/app/tmp/matplotlib
16-
RUN mkdir -p /app/tmp/matplotlib
1721

18-
RUN pip install --no-cache-dir -r requirements.txt
19-
VOLUME ['/tmp']
20-
# CMD python GetBeatAML.py --token ${SYNAPSE_TOKEN}
22+
VOLUME ['/tmp']

build/docker/Dockerfile.broad_sanger_exp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,12 @@ RUN mkdir -p /app/tmp/matplotlib
3030
ENV PYTHONPATH "${PYTHONPATH}:/app"
3131
WORKDIR /app
3232

33-
# Add Requirements files
34-
ADD build/broad_sanger/requirements.txt .
35-
ADD build/broad_sanger/exp_requirements.r .
3633

3734
# installing r libraries
35+
ADD build/broad_sanger/exp_requirements.r .
3836
RUN Rscript exp_requirements.r
3937
# installing python libraries
38+
ADD build/broad_sanger/requirements.txt .
4039
RUN /opt/venv/bin/pip3 install -r requirements.txt
4140

4241
# Add these later so caching is already done for the R and Python libraries.

build/docker/Dockerfile.broad_sanger_omics

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,13 @@ RUN mkdir -p /app/tmp/matplotlib
3030
ENV PYTHONPATH "${PYTHONPATH}:/app"
3131
WORKDIR /app
3232

33-
# Add requirements files
34-
ADD build/broad_sanger/requirements.txt .
35-
ADD build/broad_sanger/omics_requirements.r .
3633

3734
# Install R libraries.
35+
ADD build/broad_sanger/omics_requirements.r .
3836
RUN Rscript omics_requirements.r
3937

4038
# Install Python libraries.
39+
ADD build/broad_sanger/requirements.txt .
4140
RUN /opt/venv/bin/pip install -r requirements.txt
4241

4342
# Add these later so caching is already done for the R and Python libraries.

build/docker/Dockerfile.cptac

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ ENV PYTHONPATH "${PYTHONPATH}:/tmp"
1111
ENV PYTHONPATH "${PYTHONPATH}:/app"
1212
WORKDIR /app
1313

14+
# installing python libraries
1415
COPY build/cptac/requirements.txt .
16+
RUN /opt/venv/bin/pip3 install -r requirements.txt
17+
1518
COPY build/cptac/*.py ./
1619
COPY build/cptac/*sh ./
1720

1821
# Set MPLCONFIGDIR to a writable directory
1922
ENV MPLCONFIGDIR=/app/tmp/matplotlib
2023
RUN mkdir -p /app/tmp/matplotlib
2124

22-
# installing python libraries
23-
RUN /opt/venv/bin/pip3 install -r requirements.txt
24-
2525
VOLUME ["/tmp"]
2626
#ENTRYPOINT ["python3","getCptacData.py"]

build/docker/Dockerfile.crcpdo

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ WORKDIR /app
3737
ENV MPLCONFIGDIR=/app/tmp/matplotlib
3838
RUN mkdir -p /app/tmp/matplotlib
3939

40-
41-
ADD build/crcpdo/requirements.R .
4240
# installing r libraries
41+
ADD build/crcpdo/requirements.R .
4342
RUN Rscript requirements.R
4443

4544

46-
4745
# installing python libraries
4846
ADD build/crcpdo/requirements.txt .
4947
#RUN /opt/venv/bin/pip3 install -r requirements.txt

build/docker/Dockerfile.hcmi

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,16 @@ FROM python:3.9
22

33
WORKDIR /usr/src/app
44

5-
COPY build/hcmi/01-createHCMISamplesFile.py .
6-
COPY build/hcmi/02-getHCMIData.py .
7-
COPY build/hcmi/full_manifest.txt .
8-
COPY build/hcmi/requirements.txt .
9-
COPY build/hcmi/*sh ./
10-
COPY build/hcmi/hcmi_cancer_types.csv ./
11-
12-
135
# Set MPLCONFIGDIR to a writable directory
146
ENV MPLCONFIGDIR=/app/tmp/matplotlib
157
RUN mkdir -p /app/tmp/matplotlib
168

9+
COPY build/hcmi/requirements.txt .
1710
RUN pip install --no-cache-dir -r requirements.txt
1811

12+
13+
COPY build/hcmi/01-createHCMISamplesFile.py .
14+
COPY build/hcmi/02-getHCMIData.py .
15+
COPY build/hcmi/full_manifest.txt .
16+
COPY build/hcmi/*sh ./
17+
COPY build/hcmi/hcmi_cancer_types.csv ./

0 commit comments

Comments
 (0)