Skip to content

Commit 5b4badd

Browse files
Add verbose (-v) flag using logging; redact Authorization and X-API-KEY headers; propagate verbose through download flows; add tests and docs
1 parent 0437e1a commit 5b4badd

10 files changed

Lines changed: 245 additions & 34 deletions

File tree

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Changelog
2+
3+
## 0.14.1 - 2026-01-01
4+
5+
- Add `-v/--verbose` global CLI option to enable redacted HTTP request/response logging for debugging. (CLI: `databusclient -v ...`)
6+
- Ensure `Authorization` and `X-API-KEY` headers are redacted in verbose output.
7+
- Add unit tests and README documentation for verbose mode.

PR_BODY.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Title: Add verbose CLI flag (-v) with redacted HTTP logging
2+
3+
Short description:
4+
- Add a global `-v/--verbose` CLI flag to enable redacted HTTP request/response logging to help debug interactions with the Databus and Vault.
5+
6+
What changed:
7+
- Add global `-v/--verbose` option to `databusclient` CLI and propagate it to API calls.
8+
- Implement redacted HTTP logging helper (redacts `Authorization` and `X-API-KEY` headers).
9+
- Instrument `download` and Vault token exchange flows to print HTTP request/response details when `-v` is enabled.
10+
- Add unit tests ensuring verbose logs are printed and sensitive tokens are redacted.
11+
- Update `README.md` and add a `CHANGELOG.md` entry.
12+
13+
Why:
14+
- Provides safe, actionable debugging output for issues involving HTTP communication and auth problems without exposing secrets.
15+
16+
Security note:
17+
- Authorization and API-key headers are redacted in verbose output. Avoid enabling verbose output in public CI logs.
18+
19+
Closes #27

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ docker run --rm -v $(pwd):/data dbpedia/databus-python-client download $DOWNLOAD
166166
- If the dataset/files to be downloaded require vault authentication, you need to provide a vault token with `--vault-token /path/to/vault-token.dat`. See [Registration (Access Token)](#registration-access-token) for details on how to get a vault token.
167167

168168
Note: Vault tokens are only required for certain protected Databus hosts (for example: `data.dbpedia.io`, `data.dev.dbpedia.link`). The client now detects those hosts and will fail early with a clear message if a token is required but not provided. Do not pass `--vault-token` for public downloads.
169+
- `-v, --verbose`
170+
- Enable verbose HTTP request/response output for debugging. Headers that may contain secrets (for example `Authorization` and `X-API-KEY`) are redacted in the output. Use with caution and avoid enabling in public CI logs.
169171
- `--databus-key`
170172
- If the databus is protected and needs API key authentication, you can provide the API key with `--databus-key YOUR_API_KEY`.
171173

databusclient/api/download.py

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
import requests
77
from SPARQLWrapper import JSON, SPARQLWrapper
88
from tqdm import tqdm
9+
import logging
10+
11+
logger = logging.getLogger("databusclient")
912

1013
from databusclient.api.utils import (
1114
fetch_databus_jsonld,
@@ -69,12 +72,12 @@ def _download_file(
6972
headers = {}
7073

7174
# --- 1a. public databus ---
72-
if verbose:
75+
if verbose or logger.isEnabledFor(logging.DEBUG):
7376
from databusclient.api.utils import log_http
7477

7578
log_http("HEAD", url, req_headers=headers)
7679
response = requests.head(url, timeout=30, allow_redirects=False)
77-
if verbose:
80+
if verbose or logger.isEnabledFor(logging.DEBUG):
7881
from databusclient.api.utils import log_http
7982

8083
log_http("HEAD", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
@@ -118,14 +121,14 @@ def _download_file(
118121
headers["Accept-Encoding"] = (
119122
"identity" # disable gzip to get correct content-length
120123
)
121-
if verbose:
124+
if verbose or logger.isEnabledFor(logging.DEBUG):
122125
from databusclient.api.utils import log_http
123126

124127
log_http("GET", url, req_headers=headers)
125128
response = requests.get(
126129
url, headers=headers, stream=True, allow_redirects=True, timeout=30
127130
)
128-
if verbose:
131+
if verbose or logger.isEnabledFor(logging.DEBUG):
129132
from databusclient.api.utils import log_http
130133

131134
log_http("GET", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
@@ -159,12 +162,12 @@ def _download_file(
159162
headers.pop("Accept-Encoding", None)
160163

161164
# Retry with token
162-
if verbose:
165+
if verbose or logger.isEnabledFor(logging.DEBUG):
163166
from databusclient.api.utils import log_http
164167

165168
log_http("GET", url, req_headers=headers)
166169
response = requests.get(url, headers=headers, stream=True, timeout=30)
167-
if verbose:
170+
if verbose or logger.isEnabledFor(logging.DEBUG):
168171
from databusclient.api.utils import log_http
169172

170173
log_http("GET", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
@@ -243,50 +246,73 @@ def _download_files(
243246
)
244247

245248

246-
def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str:
249+
def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None, verbose: bool = False) -> str:
247250
"""
248251
Get SPARQL query of collection members from databus collection URI.
249252
250253
Parameters:
251254
- uri: The full databus collection URI
252255
- databus_key: Optional Databus API key for authentication on protected resources
256+
- verbose: when True, print redacted HTTP request/response details
253257
254258
Returns:
255259
SPARQL query string to get download URLs of all files in the collection.
256260
"""
257261
headers = {"Accept": "text/sparql"}
258262
if databus_key is not None:
259263
headers["X-API-KEY"] = databus_key
264+
if verbose:
265+
from databusclient.api.utils import log_http
266+
267+
log_http("GET", uri, req_headers=headers)
260268

261269
response = requests.get(uri, headers=headers, timeout=30)
270+
if verbose:
271+
from databusclient.api.utils import log_http
272+
273+
log_http("GET", uri, req_headers=headers, status=response.status_code, resp_headers=response.headers)
274+
262275
response.raise_for_status()
263276
return response.text
264277

265278

266-
def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict:
279+
def _query_sparql_endpoint(endpoint_url, query, databus_key=None, verbose: bool = False) -> dict:
267280
"""
268281
Query a SPARQL endpoint and return results in JSON format.
269282
270283
Parameters:
271284
- endpoint_url: the URL of the SPARQL endpoint
272285
- query: the SPARQL query string
273286
- databus_key: Optional API key for authentication
287+
- verbose: when True, print redacted HTTP request/response details
274288
275289
Returns:
276290
- Dictionary containing the query results
277291
"""
292+
if verbose:
293+
from databusclient.api.utils import log_http
294+
295+
headers = {"X-API-KEY": databus_key} if databus_key is not None else None
296+
log_http("POST", endpoint_url, req_headers=headers)
297+
278298
sparql = SPARQLWrapper(endpoint_url)
279299
sparql.method = "POST"
280300
sparql.setQuery(query)
281301
sparql.setReturnFormat(JSON)
282302
if databus_key is not None:
283303
sparql.setCustomHttpHeaders({"X-API-KEY": databus_key})
284304
results = sparql.query().convert()
305+
306+
if verbose:
307+
from databusclient.api.utils import log_http
308+
309+
log_http("POST", endpoint_url, req_headers={"X-API-KEY": databus_key} if databus_key is not None else None, status=200)
310+
285311
return results
286312

287313

288314
def _get_file_download_urls_from_sparql_query(
289-
endpoint_url, query, databus_key=None
315+
endpoint_url, query, databus_key=None, verbose: bool = False
290316
) -> List[str]:
291317
"""
292318
Execute a SPARQL query to get databus file download URLs.
@@ -295,11 +321,12 @@ def _get_file_download_urls_from_sparql_query(
295321
- endpoint_url: the URL of the SPARQL endpoint
296322
- query: the SPARQL query string
297323
- databus_key: Optional API key for authentication
324+
- verbose: when True, print redacted HTTP request/response details
298325
299326
Returns:
300327
- List of file download URLs
301328
"""
302-
result_dict = _query_sparql_endpoint(endpoint_url, query, databus_key=databus_key)
329+
result_dict = _query_sparql_endpoint(endpoint_url, query, databus_key=databus_key, verbose=verbose)
303330

304331
bindings = result_dict.get("results", {}).get("bindings")
305332
if not isinstance(bindings, list):
@@ -336,7 +363,8 @@ def __get_vault_access__(
336363
with open(token_file, "r") as f:
337364
refresh_token = f.read().strip()
338365
if len(refresh_token) < 80:
339-
print(f"Warning: token from {token_file} is short (<80 chars)")
366+
logger.warning("Token from %s is short (<80 chars)", token_file)
367+
340368

341369
# 2. Refresh token -> access token
342370
resp = requests.post(
@@ -349,7 +377,7 @@ def __get_vault_access__(
349377
timeout=30,
350378
)
351379
resp.raise_for_status()
352-
if verbose:
380+
if verbose or logger.isEnabledFor(logging.DEBUG):
353381
from databusclient.api.utils import log_http
354382

355383
log_http("POST", auth_url, req_headers={"client_id": client_id}, status=resp.status_code, resp_headers=resp.headers)
@@ -377,13 +405,13 @@ def __get_vault_access__(
377405
timeout=30,
378406
)
379407
resp.raise_for_status()
380-
if verbose:
408+
if verbose or logger.isEnabledFor(logging.DEBUG):
381409
from databusclient.api.utils import log_http
382410

383411
log_http("POST", auth_url, req_headers={"client_id": client_id, "audience": audience}, status=resp.status_code, resp_headers=resp.headers)
384412
vault_token = resp.json()["access_token"]
385413

386-
print(f"Using Vault access token for {download_url}")
414+
logger.debug("Using Vault access token for %s", download_url)
387415
return vault_token
388416

389417

@@ -395,6 +423,7 @@ def _download_collection(
395423
databus_key: str = None,
396424
auth_url: str = None,
397425
client_id: str = None,
426+
verbose: bool = False,
398427
) -> None:
399428
"""
400429
Download all files in a databus collection.
@@ -407,10 +436,11 @@ def _download_collection(
407436
- databus_key: Databus API key for protected downloads
408437
- auth_url: Keycloak token endpoint URL
409438
- client_id: Client ID for token exchange
439+
- verbose: when True, print redacted HTTP request/response details
410440
"""
411-
query = _get_sparql_query_of_collection(uri, databus_key=databus_key)
441+
query = _get_sparql_query_of_collection(uri, databus_key=databus_key, verbose=verbose)
412442
file_urls = _get_file_download_urls_from_sparql_query(
413-
endpoint, query, databus_key=databus_key
443+
endpoint, query, databus_key=databus_key, verbose=verbose
414444
)
415445
_download_files(
416446
list(file_urls),
@@ -419,6 +449,7 @@ def _download_collection(
419449
databus_key=databus_key,
420450
auth_url=auth_url,
421451
client_id=client_id,
452+
verbose=verbose,
422453
)
423454

424455

@@ -429,6 +460,7 @@ def _download_version(
429460
databus_key: str = None,
430461
auth_url: str = None,
431462
client_id: str = None,
463+
verbose: bool = False,
432464
) -> None:
433465
"""
434466
Download all files in a databus artifact version.
@@ -440,8 +472,9 @@ def _download_version(
440472
- databus_key: Databus API key for protected downloads
441473
- auth_url: Keycloak token endpoint URL
442474
- client_id: Client ID for token exchange
475+
- verbose: when True, print redacted HTTP request/response details
443476
"""
444-
json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
477+
json_str = fetch_databus_jsonld(uri, databus_key=databus_key, verbose=verbose)
445478
file_urls = _get_file_download_urls_from_artifact_jsonld(json_str)
446479
_download_files(
447480
file_urls,
@@ -450,6 +483,7 @@ def _download_version(
450483
databus_key=databus_key,
451484
auth_url=auth_url,
452485
client_id=client_id,
486+
verbose=verbose,
453487
)
454488

455489

@@ -461,6 +495,7 @@ def _download_artifact(
461495
databus_key: str = None,
462496
auth_url: str = None,
463497
client_id: str = None,
498+
verbose: bool = False,
464499
) -> None:
465500
"""
466501
Download files in a databus artifact.
@@ -473,14 +508,15 @@ def _download_artifact(
473508
- databus_key: Databus API key for protected downloads
474509
- auth_url: Keycloak token endpoint URL
475510
- client_id: Client ID for token exchange
511+
- verbose: when True, print redacted HTTP request/response details
476512
"""
477-
json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
513+
json_str = fetch_databus_jsonld(uri, databus_key=databus_key, verbose=verbose)
478514
versions = _get_databus_versions_of_artifact(json_str, all_versions=all_versions)
479515
if isinstance(versions, str):
480516
versions = [versions]
481517
for version_uri in versions:
482518
print(f"Downloading version: {version_uri}")
483-
json_str = fetch_databus_jsonld(version_uri, databus_key=databus_key)
519+
json_str = fetch_databus_jsonld(version_uri, databus_key=databus_key, verbose=verbose)
484520
file_urls = _get_file_download_urls_from_artifact_jsonld(json_str)
485521
_download_files(
486522
file_urls,
@@ -489,6 +525,7 @@ def _download_artifact(
489525
databus_key=databus_key,
490526
auth_url=auth_url,
491527
client_id=client_id,
528+
verbose=verbose,
492529
)
493530

494531

@@ -564,6 +601,7 @@ def _download_group(
564601
databus_key: str = None,
565602
auth_url: str = None,
566603
client_id: str = None,
604+
verbose: bool = False,
567605
) -> None:
568606
"""
569607
Download files in a databus group.
@@ -576,8 +614,9 @@ def _download_group(
576614
- databus_key: Databus API key for protected downloads
577615
- auth_url: Keycloak token endpoint URL
578616
- client_id: Client ID for token exchange
617+
- verbose: when True, print redacted HTTP request/response details
579618
"""
580-
json_str = fetch_databus_jsonld(uri, databus_key=databus_key)
619+
json_str = fetch_databus_jsonld(uri, databus_key=databus_key, verbose=verbose)
581620
artifacts = _get_databus_artifacts_of_group(json_str)
582621
for artifact_uri in artifacts:
583622
print(f"Download artifact: {artifact_uri}")
@@ -589,6 +628,7 @@ def _download_group(
589628
databus_key=databus_key,
590629
auth_url=auth_url,
591630
client_id=client_id,
631+
verbose=verbose,
592632
)
593633

594634

@@ -677,6 +717,7 @@ def download(
677717
databus_key,
678718
auth_url,
679719
client_id,
720+
verbose=verbose,
680721
)
681722
elif file is not None:
682723
print(f"Downloading file: {databusURI}")
@@ -686,7 +727,9 @@ def download(
686727
vault_token_file=token,
687728
databus_key=databus_key,
688729
auth_url=auth_url,
689-
client_id=client_id, verbose=verbose, )
730+
client_id=client_id,
731+
verbose=verbose,
732+
)
690733
elif version is not None:
691734
print(f"Downloading version: {databusURI}")
692735
_download_version(
@@ -696,6 +739,7 @@ def download(
696739
databus_key=databus_key,
697740
auth_url=auth_url,
698741
client_id=client_id,
742+
verbose=verbose,
699743
)
700744
elif artifact is not None:
701745
print(
@@ -709,6 +753,7 @@ def download(
709753
databus_key=databus_key,
710754
auth_url=auth_url,
711755
client_id=client_id,
756+
verbose=verbose,
712757
)
713758
elif group is not None and group != "collections":
714759
print(
@@ -722,6 +767,7 @@ def download(
722767
databus_key=databus_key,
723768
auth_url=auth_url,
724769
client_id=client_id,
770+
verbose=verbose,
725771
)
726772
elif account is not None:
727773
print("accountId not supported yet") # TODO
@@ -738,7 +784,7 @@ def download(
738784
if uri_endpoint is None: # endpoint is required for queries (--databus)
739785
raise ValueError("No endpoint given for query")
740786
res = _get_file_download_urls_from_sparql_query(
741-
uri_endpoint, databusURI, databus_key=databus_key
787+
uri_endpoint, databusURI, databus_key=databus_key, verbose=verbose
742788
)
743789
_download_files(
744790
res,

0 commit comments

Comments
 (0)