1111import gzip
1212from glob import glob
1313import sys
14+ import requests
1415
1516def main ():
1617 parser = argparse .ArgumentParser (
@@ -39,7 +40,7 @@ def main():
3940 parser .add_argument ('--figshare' , action = 'store_true' , help = "Upload all local data to Figshare. FIGSHARE_TOKEN must be set in local environment." )
4041 parser .add_argument ('--all' ,dest = 'all' ,default = False ,action = 'store_true' , help = "Run all data build commands. This includes docker, samples, omics, drugs, exp arguments. This does not run the validate or figshare commands" )
4142 parser .add_argument ('--high_mem' ,dest = 'high_mem' ,default = False ,action = 'store_true' ,help = "If you have 32 or more CPUs, this option is recommended. It will run many code portions in parallel. If you don't have enough memory, this will cause a run failure." )
42- parser .add_argument ('--dataset' ,dest = 'datasets' ,default = 'broad_sanger,hcmi,beataml,cptac,mpnst,mpnstpdx' ,help = 'Datasets to process. Defaults to all available.' )
43+ parser .add_argument ('--dataset' ,dest = 'datasets' ,default = 'broad_sanger,hcmi,beataml,cptac,mpnst,mpnstpdx,pancpdo ' ,help = 'Datasets to process. Defaults to all available.' )
4344 parser .add_argument ('--version' , type = str , required = False , help = 'Version number for the Figshare upload title (e.g., "0.1.29"). This is required for Figshare upload. This must be a higher version than previously published versions.' )
4445 parser .add_argument ('--github-username' , type = str , required = False , help = 'GitHub username for the repository.' )
4546 parser .add_argument ('--github-email' , type = str , required = False , help = 'GitHub email for the repository.' )
@@ -119,6 +120,7 @@ def process_docker(datasets):
119120 'beataml' : ['beataml' ],
120121 'mpnst' : ['mpnst' ],
121122 'mpnstpdx' : ['mpnstpdx' ],
123+ 'pancpdo' : ['pancpdo' ],
122124 'cptac' : ['cptac' ],
123125 'genes' : ['genes' ],
124126 'upload' : ['upload' ]
@@ -130,7 +132,7 @@ def process_docker(datasets):
130132 datasets_to_build .extend (dataset_map .get (dataset , []))
131133
132134 # Build the docker-compose command, adding specific datasets
133- compose_command = ['docker- compose' , '-f' , compose_file , 'build' , '--parallel' ] + datasets_to_build
135+ compose_command = ['docker' , ' compose' , '-f' , compose_file , 'build' , '--parallel' ] + datasets_to_build
134136
135137 log_file_path = 'local/docker.log'
136138 env = os .environ .copy ()
@@ -265,9 +267,11 @@ def run_docker_upload_cmd(cmd_arr, all_files_dir, name, version):
265267 docker_run = ['docker' , 'run' , '--rm' , '-v' , f"{ env ['PWD' ]} /local/{ all_files_dir } :/tmp" , '-e' , f"VERSION={ version } " ]
266268
267269 # Add Appropriate Environment Variables
270+ if name == "validate" :
271+ docker_run .extend (['upload' ])
268272 if 'FIGSHARE_TOKEN' in env and name == 'Figshare' :
269273 docker_run .extend (['-e' , f"FIGSHARE_TOKEN={ env ['FIGSHARE_TOKEN' ]} " , 'upload' ])
270- if name == "validate " :
274+ if name == "Map_Drugs" or name == "Map_Samples " :
271275 docker_run .extend (['upload' ])
272276 if 'GITHUB_TOKEN' in env and name == "GitHub" :
273277 docker_run .extend (['-e' , f"GITHUB_TOKEN={ env ['GITHUB_TOKEN' ]} " , 'upload' ])
@@ -299,6 +303,18 @@ def compress_file(file_path):
299303 with gzip .open (compressed_file_path , 'wb' ) as f_out :
300304 shutil .copyfileobj (f_in , f_out )
301305 os .remove (file_path )
306+
307+ def get_latest_commit_hash (owner , repo , branch = 'main' ):
308+ """
309+ Returns the SHA of the latest commit on the specified branch.
310+ """
311+ url = f"https://api.github.com/repos/{ owner } /{ repo } /commits/{ branch } "
312+ response = requests .get (url )
313+ response .raise_for_status ()
314+
315+ # The commit data is in JSON format; the 'sha' field is the full commit hash.
316+ commit_data = response .json ()
317+ return commit_data ['sha' ]
302318
303319 ######
304320 ### Pre-Build Environment Token Check
@@ -387,17 +403,17 @@ def compress_file(file_path):
387403 ######
388404 ### Begin Upload and/or validation
389405 #####
390-
391- if args .figshare or args .validate :
406+ if args . figshare or args . validate or github_token :
407+ # if args.figshare or args.validate:
392408 # FigShare File Prefixes:
409+
393410 prefixes = ['beataml' , 'hcmi' , 'cptac' , 'mpnst' , 'genes' , 'drugs' ]
394411 broad_sanger_datasets = ["ccle" ,"ctrpv2" ,"fimm" ,"gdscv1" ,"gdscv2" ,"gcsi" ,"prism" ,"nci60" ]
395412 if "broad_sanger" in datasets :
396413 prefixes .extend (broad_sanger_datasets )
397414 datasets .extend (broad_sanger_datasets )
398415 datasets .remove ("broad_sanger" )
399416
400-
401417 figshare_token = os .getenv ('FIGSHARE_TOKEN' )
402418
403419 all_files_dir = 'local/all_files_dir'
@@ -421,6 +437,13 @@ def compress_file(file_path):
421437 for file in glob (os .path .join (all_files_dir , '*.gz' )):
422438 decompress_file (file )
423439
440+ ### These should be done before schema checking.
441+ sample_mapping_command = ['python3' , 'scripts/map_improve_sample_ids.py' , '--local_dir' , "/tmp" , '--version' , args .version ]
442+ run_docker_upload_cmd (sample_mapping_command , 'all_files_dir' , 'Map_Samples' , args .version )
443+
444+ drug_mapping_command = ['python3' , 'scripts/map_improve_drug_ids.py' , '--local_dir' , "/tmp" , '--version' , args .version ]
445+ run_docker_upload_cmd (drug_mapping_command , 'all_files_dir' , 'Map_Drugs' , args .version )
446+
424447 # Run schema checker - This will always run if uploading data.
425448 schema_check_command = ['python3' , 'scripts/check_schema.py' , '--datasets' ] + datasets
426449 run_docker_upload_cmd (schema_check_command , 'all_files_dir' , 'validate' , args .version )
@@ -437,28 +460,47 @@ def compress_file(file_path):
437460
438461 print ("File compression and decompression adjustments are complete." )
439462
440- # Upload to Figshare using Docker
463+ ## # Upload to Figshare using Docker
441464 if args .figshare and args .version and figshare_token :
442- figshare_command = ['python3' , 'scripts/push_to_figshare.py' , '--directory' , "/tmp" , '--title' , f"CODERData{ args .version } " , '--token' , os .getenv ('FIGSHARE_TOKEN' ), '--project_id' , '189342' , '--publish' ]
465+ figshare_command = ['python3' , 'scripts/push_to_figshare.py' , '--directory' , "/tmp" , '--title' , f"CODERData{ args .version } " , '--token' , os .getenv ('FIGSHARE_TOKEN' ), '--project_id' , '189342' , '--version' , args . version , '-- publish' ]
443466 run_docker_upload_cmd (figshare_command , 'all_files_dir' , 'Figshare' , args .version )
444467
468+ ### Push changes to GitHub using Docker
469+ # if args.version and args.figshare and figshare_token and github_token and args.github_username and args.github_email:
445470
446- # Push changes to GitHub using Docker
447- if args .version and args .figshare and figshare_token and github_token and args .github_username and args .github_email :
448- git_command = [
449- 'bash' , '-c' , (
450- f'git config --global user.name "{ args .github_username } " '
451- f'&& git config --global user.email "{ args .github_email } " '
452- f'&& cp /tmp/figshare_latest.yml /usr/src/app/coderdata/docs/_data/figshare_latest.yml '
453- f'&& git add docs/_data/figshare_latest.yml '
454- f'&& git commit -m "Data Built and Uploaded. New Tag: { args .version } " '
455- f'&& git tag { args .version } '
456- f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git main '
457- f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git --tags'
458- )
459- ]
460- run_docker_upload_cmd (git_command , 'all_files_dir' , 'GitHub' , args .version )
471+ # You can only upload to Github after Figshare upload is completed - otherwise figshare_latest.yml and dataset.yml won't be available.
472+ if args .version and github_token and args .github_username and args .github_email :
473+
474+ git_command = [
475+ 'bash' , '-c' , (
476+ f'git config --global user.name "{ args .github_username } " '
477+ f'&& git config --global user.email "{ args .github_email } " '
478+
479+ # Checkout a new branch
480+ f'&& git checkout -b testing-auto-build-pr-{ args .version } '
481+
482+ # Copy and add the necessary files
483+ f'&& cp /tmp/improve_sample_mapping.json.gz /usr/src/app/coderdata/build/improve_sample_mapping.json.gz '
484+ f'&& cp /tmp/improve_drug_mapping.json.gz /usr/src/app/coderdata/build/improve_drug_mapping.json.gz '
485+ f'&& gunzip /usr/src/app/coderdata/build/*.gz '
486+ f'&& git add -f build/improve_sample_mapping.json build/improve_drug_mapping.json '
487+ f'&& cp /tmp/figshare_latest.yml /usr/src/app/coderdata/docs/_data/figshare_latest.yml '
488+ f'&& cp /tmp/dataset.yml /usr/src/app/coderdata/coderdata/dataset.yml '
489+ f'&& git add -f docs/_data/figshare_latest.yml coderdata/dataset.yml'
490+
491+ # Tag and push
492+ f'&& git commit -m "Data Built and Uploaded. New Tag: { args .version } " '
493+ f'&& git tag { args .version } '
494+ f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git testing-auto-build-pr-{ args .version } '
495+
496+ # Create a PR using GitHub CLI
497+ f'&& gh pr create --title "Testing Auto PR instead of auto Merge { args .version } " '
498+ f'--body "This PR was automatically generated by the build process." '
499+ f'--base main --head testing-auto-build-pr-{ args .version } '
500+ )
501+ ]
502+
503+ run_docker_upload_cmd (git_command , 'all_files_dir' , 'GitHub' , args .version )
461504
462-
463505if __name__ == '__main__' :
464506 main ()
0 commit comments