66import requests
77from SPARQLWrapper import JSON , SPARQLWrapper
88from tqdm import tqdm
9+ import logging
10+
11+ logger = logging .getLogger ("databusclient" )
912
1013from databusclient .api .utils import (
1114 fetch_databus_jsonld ,
@@ -69,12 +72,12 @@ def _download_file(
6972 headers = {}
7073
7174 # --- 1a. public databus ---
72- if verbose :
75+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
7376 from databusclient .api .utils import log_http
7477
7578 log_http ("HEAD" , url , req_headers = headers )
7679 response = requests .head (url , timeout = 30 , allow_redirects = False )
77- if verbose :
80+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
7881 from databusclient .api .utils import log_http
7982
8083 log_http ("HEAD" , url , req_headers = headers , status = response .status_code , resp_headers = response .headers )
@@ -118,14 +121,14 @@ def _download_file(
118121 headers ["Accept-Encoding" ] = (
119122 "identity" # disable gzip to get correct content-length
120123 )
121- if verbose :
124+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
122125 from databusclient .api .utils import log_http
123126
124127 log_http ("GET" , url , req_headers = headers )
125128 response = requests .get (
126129 url , headers = headers , stream = True , allow_redirects = True , timeout = 30
127130 )
128- if verbose :
131+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
129132 from databusclient .api .utils import log_http
130133
131134 log_http ("GET" , url , req_headers = headers , status = response .status_code , resp_headers = response .headers )
@@ -159,12 +162,12 @@ def _download_file(
159162 headers .pop ("Accept-Encoding" , None )
160163
161164 # Retry with token
162- if verbose :
165+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
163166 from databusclient .api .utils import log_http
164167
165168 log_http ("GET" , url , req_headers = headers )
166169 response = requests .get (url , headers = headers , stream = True , timeout = 30 )
167- if verbose :
170+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
168171 from databusclient .api .utils import log_http
169172
170173 log_http ("GET" , url , req_headers = headers , status = response .status_code , resp_headers = response .headers )
@@ -243,50 +246,73 @@ def _download_files(
243246 )
244247
245248
246- def _get_sparql_query_of_collection (uri : str , databus_key : str | None = None ) -> str :
249+ def _get_sparql_query_of_collection (uri : str , databus_key : str | None = None , verbose : bool = False ) -> str :
247250 """
248251 Get SPARQL query of collection members from databus collection URI.
249252
250253 Parameters:
251254 - uri: The full databus collection URI
252255 - databus_key: Optional Databus API key for authentication on protected resources
256+ - verbose: when True, print redacted HTTP request/response details
253257
254258 Returns:
255259 SPARQL query string to get download URLs of all files in the collection.
256260 """
257261 headers = {"Accept" : "text/sparql" }
258262 if databus_key is not None :
259263 headers ["X-API-KEY" ] = databus_key
264+ if verbose :
265+ from databusclient .api .utils import log_http
266+
267+ log_http ("GET" , uri , req_headers = headers )
260268
261269 response = requests .get (uri , headers = headers , timeout = 30 )
270+ if verbose :
271+ from databusclient .api .utils import log_http
272+
273+ log_http ("GET" , uri , req_headers = headers , status = response .status_code , resp_headers = response .headers )
274+
262275 response .raise_for_status ()
263276 return response .text
264277
265278
266- def _query_sparql_endpoint (endpoint_url , query , databus_key = None ) -> dict :
279+ def _query_sparql_endpoint (endpoint_url , query , databus_key = None , verbose : bool = False ) -> dict :
267280 """
268281 Query a SPARQL endpoint and return results in JSON format.
269282
270283 Parameters:
271284 - endpoint_url: the URL of the SPARQL endpoint
272285 - query: the SPARQL query string
273286 - databus_key: Optional API key for authentication
287+ - verbose: when True, print redacted HTTP request/response details
274288
275289 Returns:
276290 - Dictionary containing the query results
277291 """
292+ if verbose :
293+ from databusclient .api .utils import log_http
294+
295+ headers = {"X-API-KEY" : databus_key } if databus_key is not None else None
296+ log_http ("POST" , endpoint_url , req_headers = headers )
297+
278298 sparql = SPARQLWrapper (endpoint_url )
279299 sparql .method = "POST"
280300 sparql .setQuery (query )
281301 sparql .setReturnFormat (JSON )
282302 if databus_key is not None :
283303 sparql .setCustomHttpHeaders ({"X-API-KEY" : databus_key })
284304 results = sparql .query ().convert ()
305+
306+ if verbose :
307+ from databusclient .api .utils import log_http
308+
309+ log_http ("POST" , endpoint_url , req_headers = {"X-API-KEY" : databus_key } if databus_key is not None else None , status = 200 )
310+
285311 return results
286312
287313
288314def _get_file_download_urls_from_sparql_query (
289- endpoint_url , query , databus_key = None
315+ endpoint_url , query , databus_key = None , verbose : bool = False
290316) -> List [str ]:
291317 """
292318 Execute a SPARQL query to get databus file download URLs.
@@ -295,11 +321,12 @@ def _get_file_download_urls_from_sparql_query(
295321 - endpoint_url: the URL of the SPARQL endpoint
296322 - query: the SPARQL query string
297323 - databus_key: Optional API key for authentication
324+ - verbose: when True, print redacted HTTP request/response details
298325
299326 Returns:
300327 - List of file download URLs
301328 """
302- result_dict = _query_sparql_endpoint (endpoint_url , query , databus_key = databus_key )
329+ result_dict = _query_sparql_endpoint (endpoint_url , query , databus_key = databus_key , verbose = verbose )
303330
304331 bindings = result_dict .get ("results" , {}).get ("bindings" )
305332 if not isinstance (bindings , list ):
@@ -336,7 +363,8 @@ def __get_vault_access__(
336363 with open (token_file , "r" ) as f :
337364 refresh_token = f .read ().strip ()
338365 if len (refresh_token ) < 80 :
339- print (f"Warning: token from { token_file } is short (<80 chars)" )
366+ logger .warning ("Token from %s is short (<80 chars)" , token_file )
367+
340368
341369 # 2. Refresh token -> access token
342370 resp = requests .post (
@@ -349,7 +377,7 @@ def __get_vault_access__(
349377 timeout = 30 ,
350378 )
351379 resp .raise_for_status ()
352- if verbose :
380+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
353381 from databusclient .api .utils import log_http
354382
355383 log_http ("POST" , auth_url , req_headers = {"client_id" : client_id }, status = resp .status_code , resp_headers = resp .headers )
@@ -377,13 +405,13 @@ def __get_vault_access__(
377405 timeout = 30 ,
378406 )
379407 resp .raise_for_status ()
380- if verbose :
408+ if verbose or logger . isEnabledFor ( logging . DEBUG ) :
381409 from databusclient .api .utils import log_http
382410
383411 log_http ("POST" , auth_url , req_headers = {"client_id" : client_id , "audience" : audience }, status = resp .status_code , resp_headers = resp .headers )
384412 vault_token = resp .json ()["access_token" ]
385413
386- print ( f "Using Vault access token for { download_url } " )
414+ logger . debug ( "Using Vault access token for %s" , download_url )
387415 return vault_token
388416
389417
@@ -395,6 +423,7 @@ def _download_collection(
395423 databus_key : str = None ,
396424 auth_url : str = None ,
397425 client_id : str = None ,
426+ verbose : bool = False ,
398427) -> None :
399428 """
400429 Download all files in a databus collection.
@@ -407,10 +436,11 @@ def _download_collection(
407436 - databus_key: Databus API key for protected downloads
408437 - auth_url: Keycloak token endpoint URL
409438 - client_id: Client ID for token exchange
439+ - verbose: when True, print redacted HTTP request/response details
410440 """
411- query = _get_sparql_query_of_collection (uri , databus_key = databus_key )
441+ query = _get_sparql_query_of_collection (uri , databus_key = databus_key , verbose = verbose )
412442 file_urls = _get_file_download_urls_from_sparql_query (
413- endpoint , query , databus_key = databus_key
443+ endpoint , query , databus_key = databus_key , verbose = verbose
414444 )
415445 _download_files (
416446 list (file_urls ),
@@ -419,6 +449,7 @@ def _download_collection(
419449 databus_key = databus_key ,
420450 auth_url = auth_url ,
421451 client_id = client_id ,
452+ verbose = verbose ,
422453 )
423454
424455
@@ -429,6 +460,7 @@ def _download_version(
429460 databus_key : str = None ,
430461 auth_url : str = None ,
431462 client_id : str = None ,
463+ verbose : bool = False ,
432464) -> None :
433465 """
434466 Download all files in a databus artifact version.
@@ -440,8 +472,9 @@ def _download_version(
440472 - databus_key: Databus API key for protected downloads
441473 - auth_url: Keycloak token endpoint URL
442474 - client_id: Client ID for token exchange
475+ - verbose: when True, print redacted HTTP request/response details
443476 """
444- json_str = fetch_databus_jsonld (uri , databus_key = databus_key )
477+ json_str = fetch_databus_jsonld (uri , databus_key = databus_key , verbose = verbose )
445478 file_urls = _get_file_download_urls_from_artifact_jsonld (json_str )
446479 _download_files (
447480 file_urls ,
@@ -450,6 +483,7 @@ def _download_version(
450483 databus_key = databus_key ,
451484 auth_url = auth_url ,
452485 client_id = client_id ,
486+ verbose = verbose ,
453487 )
454488
455489
@@ -461,6 +495,7 @@ def _download_artifact(
461495 databus_key : str = None ,
462496 auth_url : str = None ,
463497 client_id : str = None ,
498+ verbose : bool = False ,
464499) -> None :
465500 """
466501 Download files in a databus artifact.
@@ -473,14 +508,15 @@ def _download_artifact(
473508 - databus_key: Databus API key for protected downloads
474509 - auth_url: Keycloak token endpoint URL
475510 - client_id: Client ID for token exchange
511+ - verbose: when True, print redacted HTTP request/response details
476512 """
477- json_str = fetch_databus_jsonld (uri , databus_key = databus_key )
513+ json_str = fetch_databus_jsonld (uri , databus_key = databus_key , verbose = verbose )
478514 versions = _get_databus_versions_of_artifact (json_str , all_versions = all_versions )
479515 if isinstance (versions , str ):
480516 versions = [versions ]
481517 for version_uri in versions :
482518 print (f"Downloading version: { version_uri } " )
483- json_str = fetch_databus_jsonld (version_uri , databus_key = databus_key )
519+ json_str = fetch_databus_jsonld (version_uri , databus_key = databus_key , verbose = verbose )
484520 file_urls = _get_file_download_urls_from_artifact_jsonld (json_str )
485521 _download_files (
486522 file_urls ,
@@ -489,6 +525,7 @@ def _download_artifact(
489525 databus_key = databus_key ,
490526 auth_url = auth_url ,
491527 client_id = client_id ,
528+ verbose = verbose ,
492529 )
493530
494531
@@ -564,6 +601,7 @@ def _download_group(
564601 databus_key : str = None ,
565602 auth_url : str = None ,
566603 client_id : str = None ,
604+ verbose : bool = False ,
567605) -> None :
568606 """
569607 Download files in a databus group.
@@ -576,8 +614,9 @@ def _download_group(
576614 - databus_key: Databus API key for protected downloads
577615 - auth_url: Keycloak token endpoint URL
578616 - client_id: Client ID for token exchange
617+ - verbose: when True, print redacted HTTP request/response details
579618 """
580- json_str = fetch_databus_jsonld (uri , databus_key = databus_key )
619+ json_str = fetch_databus_jsonld (uri , databus_key = databus_key , verbose = verbose )
581620 artifacts = _get_databus_artifacts_of_group (json_str )
582621 for artifact_uri in artifacts :
583622 print (f"Download artifact: { artifact_uri } " )
@@ -589,6 +628,7 @@ def _download_group(
589628 databus_key = databus_key ,
590629 auth_url = auth_url ,
591630 client_id = client_id ,
631+ verbose = verbose ,
592632 )
593633
594634
@@ -677,6 +717,7 @@ def download(
677717 databus_key ,
678718 auth_url ,
679719 client_id ,
720+ verbose = verbose ,
680721 )
681722 elif file is not None :
682723 print (f"Downloading file: { databusURI } " )
@@ -686,7 +727,9 @@ def download(
686727 vault_token_file = token ,
687728 databus_key = databus_key ,
688729 auth_url = auth_url ,
689- client_id = client_id , verbose = verbose , )
730+ client_id = client_id ,
731+ verbose = verbose ,
732+ )
690733 elif version is not None :
691734 print (f"Downloading version: { databusURI } " )
692735 _download_version (
@@ -696,6 +739,7 @@ def download(
696739 databus_key = databus_key ,
697740 auth_url = auth_url ,
698741 client_id = client_id ,
742+ verbose = verbose ,
699743 )
700744 elif artifact is not None :
701745 print (
@@ -709,6 +753,7 @@ def download(
709753 databus_key = databus_key ,
710754 auth_url = auth_url ,
711755 client_id = client_id ,
756+ verbose = verbose ,
712757 )
713758 elif group is not None and group != "collections" :
714759 print (
@@ -722,6 +767,7 @@ def download(
722767 databus_key = databus_key ,
723768 auth_url = auth_url ,
724769 client_id = client_id ,
770+ verbose = verbose ,
725771 )
726772 elif account is not None :
727773 print ("accountId not supported yet" ) # TODO
@@ -738,7 +784,7 @@ def download(
738784 if uri_endpoint is None : # endpoint is required for queries (--databus)
739785 raise ValueError ("No endpoint given for query" )
740786 res = _get_file_download_urls_from_sparql_query (
741- uri_endpoint , databusURI , databus_key = databus_key
787+ uri_endpoint , databusURI , databus_key = databus_key , verbose = verbose
742788 )
743789 _download_files (
744790 res ,
0 commit comments