|
17 | 17 | import polars as pl |
18 | 18 | import gc |
19 | 19 | import hashlib |
| 20 | +from pathlib import Path |
20 | 21 |
|
21 | 22 | def download_tool(url): |
22 | 23 | """ |
23 | | - Download, extract, and make a tool (GDC Client) executable from the provided URL. |
| 24 | + Download, extract, and prepare the GDC client tool. |
24 | 25 |
|
25 | 26 | Parameters |
26 | 27 | ---------- |
27 | 28 | url : str |
28 | | - The URL from where the tool needs to be downloaded. |
| 29 | + The URL to download the tool from. |
29 | 30 |
|
30 | 31 | Returns |
31 | 32 | ------- |
32 | 33 | str |
33 | | - Name of the downloaded file. |
| 34 | + The path to the `gdc-client` executable. |
34 | 35 | """ |
35 | | - |
| 36 | + # Download the file |
| 37 | + print("Downloading tool...") |
36 | 38 | filename = wget.download(url) |
37 | | - files_before = os.listdir() |
38 | | - # shutil.unpack_archive(filename) |
39 | | - |
40 | | - #This is just set for AWS to debug. This will have to be mapped to OS. They changed their file structure. This should be updated. |
41 | | - shutil.unpack_archive("gdc-client_2.3_Ubuntu_x64.zip") |
42 | | - if not os.path.exists('gdc-client'): |
43 | | - raise FileNotFoundError("gdc-client executable not found after extraction.") |
44 | | - # Ensure 'gdc-client' is executable |
45 | | - st = os.stat('gdc-client') |
46 | | - os.chmod('gdc-client', st.st_mode | stat.S_IEXEC) |
47 | | - # Return the path to the executable |
48 | | - return './gdc-client' |
49 | | - |
50 | | - # files_after = os.listdir() |
51 | | - # new_file = str(next(iter((set(files_after) - set(files_before))))) |
52 | | - # st = os.stat(new_file) |
53 | | - # os.chmod(new_file, st.st_mode | stat.S_IEXEC) |
54 | | - # return filename |
| 39 | + |
| 40 | + # First extraction |
| 41 | + print(f"\nExtracting {filename}...") |
| 42 | + shutil.unpack_archive(filename) |
| 43 | + os.remove(filename) |
| 44 | + |
| 45 | + # Check for a nested zip file and extract again |
| 46 | + extracted_files = [f for f in os.listdir() if os.path.isfile(f) and f.endswith(".zip")] |
| 47 | + for zip_file in extracted_files: |
| 48 | + print(f"Extracting nested archive: {zip_file}...") |
| 49 | + shutil.unpack_archive(zip_file) |
| 50 | + os.remove(zip_file) |
| 51 | + |
| 52 | + gdc_client_path = None |
| 53 | + for root, dirs, files in os.walk("."): |
| 54 | + if "gdc-client" in files: |
| 55 | + gdc_client_path = os.path.join(root, "gdc-client") |
| 56 | + break |
| 57 | + |
| 58 | + if not gdc_client_path: |
| 59 | + raise FileNotFoundError("`gdc-client` executable not found after extraction.") |
| 60 | + |
| 61 | + # Ensure `gdc-client` is executable |
| 62 | + print(f"Making {gdc_client_path} executable...") |
| 63 | + st = os.stat(gdc_client_path) |
| 64 | + os.chmod(gdc_client_path, st.st_mode | stat.S_IEXEC) |
| 65 | + |
| 66 | + return gdc_client_path |
55 | 67 |
|
56 | 68 | def is_tool(name): |
57 | 69 | """ |
58 | | - Check if a specific tool is available on the system or in the current directory. |
| 70 | + Check if a specific tool is available on the system. |
59 | 71 |
|
60 | 72 | Parameters |
61 | 73 | ---------- |
62 | 74 | name : str |
63 | | - The name of the tool to check. |
| 75 | + The name of the tool. |
64 | 76 |
|
65 | 77 | Returns |
66 | 78 | ------- |
67 | 79 | bool |
68 | 80 | True if the tool is found, otherwise False. |
69 | 81 | """ |
70 | | - |
71 | | - return which(name) is not None or name in os.listdir() |
| 82 | + return shutil.which(name) is not None or name in os.listdir() |
72 | 83 |
|
73 | 84 | def ensure_gdc_client(): |
74 | 85 | """ |
75 | | - Ensure that the gdc-client is available on the system. |
| 86 | + Ensure that the GDC client tool is available on the system. |
76 | 87 | |
77 | | - If the gdc-client tool isn't found, this function will automatically |
78 | | - download the appropriate version based on the operating system. |
| 88 | + If the tool isn't found, this function downloads and prepares it. |
79 | 89 | """ |
80 | | - |
81 | 90 | tool_name = "gdc-client" |
82 | 91 | if not is_tool(tool_name): |
83 | | - print("Downloading gdc-client") |
| 92 | + print("GDC client not found. Downloading...") |
84 | 93 | urls = { |
85 | | - "Darwin": 'https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_OSX_x64-py3.8-macos-14.zip', |
86 | | - "Windows": 'https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_Windows_x64-py3.8-windows-2019.zip', |
87 | | - "Linux": 'https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_Ubuntu_x64-py3.8-ubuntu-20.04.zip' |
| 94 | + "Darwin": "https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_OSX_x64-py3.8-macos-14.zip", |
| 95 | + "Windows": "https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_Windows_x64-py3.8-windows-2019.zip", |
| 96 | + "Linux": "https://gdc.cancer.gov/system/files/public/file/gdc-client_2.3_Ubuntu_x64-py3.8-ubuntu-20.04.zip" |
88 | 97 | } |
89 | | - |
90 | | - download_tool(urls.get(platform.system())) |
| 98 | + os_type = platform.system() |
| 99 | + url = urls.get(os_type) |
| 100 | + if not url: |
| 101 | + raise ValueError(f"Unsupported OS: {os_type}") |
| 102 | + gdc_client_path = download_tool(url) |
| 103 | + print(f"`gdc-client` downloaded and available at {gdc_client_path}") |
91 | 104 | else: |
92 | | - print("gdc-client already installed") |
| 105 | + print("`gdc-client` is already installed.") |
| 106 | + |
93 | 107 |
|
94 | 108 | def extract_uuids_from_manifest(manifest_data): |
95 | 109 | """ |
|
0 commit comments