Skip to content

Commit 0437e1a

Browse files
Add verbose CLI flag with redacted HTTP logging
1 parent f8aa663 commit 0437e1a

4 files changed

Lines changed: 123 additions & 6 deletions

File tree

databusclient/api/download.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def _download_file(
3232
databus_key=None,
3333
auth_url=None,
3434
client_id=None,
35+
verbose=False,
3536
) -> None:
3637
"""
3738
Download a file from the internet with a progress bar using tqdm.
@@ -43,6 +44,7 @@ def _download_file(
4344
- databus_key: Databus API key for protected downloads
4445
- auth_url: Keycloak token endpoint URL
4546
- client_id: Client ID for token exchange
47+
- verbose: when True, print redacted HTTP request/response details
4648
"""
4749
if localDir is None:
4850
_host, account, group, artifact, version, file = (
@@ -67,7 +69,15 @@ def _download_file(
6769
headers = {}
6870

6971
# --- 1a. public databus ---
72+
if verbose:
73+
from databusclient.api.utils import log_http
74+
75+
log_http("HEAD", url, req_headers=headers)
7076
response = requests.head(url, timeout=30, allow_redirects=False)
77+
if verbose:
78+
from databusclient.api.utils import log_http
79+
80+
log_http("HEAD", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
7181

7282
# Check for redirect and update URL if necessary
7383
if response.headers.get("Location") and response.status_code in [
@@ -108,9 +118,17 @@ def _download_file(
108118
headers["Accept-Encoding"] = (
109119
"identity" # disable gzip to get correct content-length
110120
)
121+
if verbose:
122+
from databusclient.api.utils import log_http
123+
124+
log_http("GET", url, req_headers=headers)
111125
response = requests.get(
112126
url, headers=headers, stream=True, allow_redirects=True, timeout=30
113127
)
128+
if verbose:
129+
from databusclient.api.utils import log_http
130+
131+
log_http("GET", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
114132
www = response.headers.get("WWW-Authenticate", "") # Check if authentication is required
115133

116134
# --- 3. Handle authentication responses ---
@@ -136,12 +154,20 @@ def _download_file(
136154
# for known hosts. __get_vault_access__ handles reading the refresh
137155
# token and exchanging it; errors are translated to DownloadAuthError
138156
# for user-friendly CLI output.
139-
vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id)
157+
vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id, verbose=verbose)
140158
headers["Authorization"] = f"Bearer {vault_token}"
141159
headers.pop("Accept-Encoding", None)
142160

143161
# Retry with token
162+
if verbose:
163+
from databusclient.api.utils import log_http
164+
165+
log_http("GET", url, req_headers=headers)
144166
response = requests.get(url, headers=headers, stream=True, timeout=30)
167+
if verbose:
168+
from databusclient.api.utils import log_http
169+
170+
log_http("GET", url, req_headers=headers, status=response.status_code, resp_headers=response.headers)
145171

146172
# Map common auth failures to friendly messages
147173
if response.status_code == 401:
@@ -191,6 +217,7 @@ def _download_files(
191217
databus_key: str = None,
192218
auth_url: str = None,
193219
client_id: str = None,
220+
verbose: bool = False,
194221
) -> None:
195222
"""
196223
Download multiple files from the databus.
@@ -202,6 +229,7 @@ def _download_files(
202229
- databus_key: Databus API key for protected downloads
203230
- auth_url: Keycloak token endpoint URL
204231
- client_id: Client ID for token exchange
232+
- verbose: when True, print redacted HTTP request/response details
205233
"""
206234
for url in urls:
207235
_download_file(
@@ -211,6 +239,7 @@ def _download_files(
211239
databus_key=databus_key,
212240
auth_url=auth_url,
213241
client_id=client_id,
242+
verbose=verbose,
214243
)
215244

216245

@@ -294,7 +323,7 @@ def _get_file_download_urls_from_sparql_query(
294323

295324

296325
def __get_vault_access__(
297-
download_url: str, token_file: str, auth_url: str, client_id: str
326+
download_url: str, token_file: str, auth_url: str, client_id: str, verbose: bool = False
298327
) -> str:
299328
"""
300329
Get Vault access token for a protected databus download.
@@ -320,6 +349,10 @@ def __get_vault_access__(
320349
timeout=30,
321350
)
322351
resp.raise_for_status()
352+
if verbose:
353+
from databusclient.api.utils import log_http
354+
355+
log_http("POST", auth_url, req_headers={"client_id": client_id}, status=resp.status_code, resp_headers=resp.headers)
323356
access_token = resp.json()["access_token"]
324357

325358
# 3. Extract host as audience
@@ -344,6 +377,10 @@ def __get_vault_access__(
344377
timeout=30,
345378
)
346379
resp.raise_for_status()
380+
if verbose:
381+
from databusclient.api.utils import log_http
382+
383+
log_http("POST", auth_url, req_headers={"client_id": client_id, "audience": audience}, status=resp.status_code, resp_headers=resp.headers)
347384
vault_token = resp.json()["access_token"]
348385

349386
print(f"Using Vault access token for {download_url}")
@@ -598,6 +635,7 @@ def download(
598635
all_versions=None,
599636
auth_url="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token",
600637
client_id="vault-token-exchange",
638+
verbose: bool = False,
601639
) -> None:
602640
"""
603641
Download datasets from databus.
@@ -612,6 +650,7 @@ def download(
612650
- databus_key: Databus API key for protected downloads
613651
- auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token".
614652
- client_id: Client ID for token exchange. Default is "vault-token-exchange".
653+
- verbose: when True, print redacted HTTP request/response details
615654
"""
616655
for databusURI in databusURIs:
617656
host, account, group, artifact, version, file = (
@@ -647,8 +686,7 @@ def download(
647686
vault_token_file=token,
648687
databus_key=databus_key,
649688
auth_url=auth_url,
650-
client_id=client_id,
651-
)
689+
client_id=client_id, verbose=verbose, )
652690
elif version is not None:
653691
print(f"Downloading version: {databusURI}")
654692
_download_version(
@@ -709,4 +747,5 @@ def download(
709747
databus_key=databus_key,
710748
auth_url=auth_url,
711749
client_id=client_id,
750+
verbose=verbose,
712751
)

databusclient/api/utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,45 @@ def fetch_databus_jsonld(uri: str, databus_key: str | None = None) -> str:
4848
response.raise_for_status()
4949

5050
return response.text
51+
52+
53+
def _redact_headers(headers):
54+
if not headers:
55+
return headers
56+
redacted = {}
57+
for k, v in headers.items():
58+
key = k.lower()
59+
if key == "authorization" or key.startswith("x-api-key"):
60+
redacted[k] = "REDACTED"
61+
else:
62+
redacted[k] = v
63+
return redacted
64+
65+
66+
def log_http(method, url, req_headers=None, status=None, resp_headers=None, body_snippet=None):
67+
print(f"[HTTP] {method} {url}")
68+
if req_headers:
69+
print(" Req headers:", _redact_headers(req_headers))
70+
if status is not None:
71+
print(" Status:", status)
72+
if resp_headers:
73+
# try to convert to dict; handle Mock or response objects gracefully
74+
try:
75+
resp_dict = dict(resp_headers)
76+
except Exception:
77+
# resp_headers might be a Mock or requests.Response; try common attributes
78+
if hasattr(resp_headers, "items"):
79+
try:
80+
resp_dict = dict(resp_headers.items())
81+
except Exception:
82+
resp_dict = {"headers": str(resp_headers)}
83+
elif hasattr(resp_headers, "headers"):
84+
try:
85+
resp_dict = dict(getattr(resp_headers, "headers") or {})
86+
except Exception:
87+
resp_dict = {"headers": str(resp_headers)}
88+
else:
89+
resp_dict = {"headers": str(resp_headers)}
90+
print(" Resp headers:", _redact_headers(resp_dict))
91+
if body_snippet:
92+
print(" Body preview:", body_snippet[:500])

databusclient/cli.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313

1414

1515
@click.group()
16-
def app():
16+
@click.option("-v", "--verbose", is_flag=True, help="Enable verbose HTTP request/response output")
17+
@click.pass_context
18+
def app(ctx, verbose):
1719
"""Databus Client CLI"""
18-
pass
20+
ctx.ensure_object(dict)
21+
ctx.obj["verbose"] = verbose
1922

2023

2124
@app.command()
@@ -159,7 +162,9 @@ def deploy(
159162
show_default=True,
160163
help="Client ID for token exchange",
161164
)
165+
@click.pass_context
162166
def download(
167+
ctx,
163168
databusuris: List[str],
164169
localdir,
165170
databus,
@@ -182,6 +187,7 @@ def download(
182187
all_versions=all_versions,
183188
auth_url=authurl,
184189
client_id=clientid,
190+
verbose=ctx.obj.get("verbose", False),
185191
)
186192
except DownloadAuthError as e:
187193
raise click.ClickException(str(e))

tests/test_download_auth.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,33 @@ def test_403_reports_insufficient_permissions():
102102
dl._download_file(url, localDir='.', vault_token_file="/some/token/file")
103103

104104
assert "permission" in str(exc.value) or "forbidden" in str(exc.value)
105+
106+
107+
def test_verbose_redacts_authorization(monkeypatch, capsys):
108+
vault_host = next(iter(VAULT_REQUIRED_HOSTS))
109+
url = f"https://{vault_host}/protected/file.ttl"
110+
111+
resp_head = make_response(status=200, headers={})
112+
resp_401 = make_response(status=401, headers={"WWW-Authenticate": "Bearer realm=\"auth\""})
113+
resp_200 = make_response(status=200, headers={"content-length": "0"}, content=b"")
114+
115+
get_side_effects = [resp_401, resp_200]
116+
117+
post_resp_1 = Mock()
118+
post_resp_1.json.return_value = {"access_token": "ACCESS"}
119+
post_resp_2 = Mock()
120+
post_resp_2.json.return_value = {"access_token": "VAULT"}
121+
122+
with patch("requests.head", return_value=resp_head), patch(
123+
"requests.get", side_effect=get_side_effects
124+
), patch("requests.post", side_effect=[post_resp_1, post_resp_2]):
125+
monkeypatch.setenv("REFRESH_TOKEN", "x" * 90)
126+
127+
# run download with verbose enabled
128+
dl._download_file(url, localDir='.', vault_token_file="/does/not/matter", verbose=True)
129+
captured = capsys.readouterr()
130+
assert "[HTTP] HEAD" in captured.out or "[HTTP] GET" in captured.out
131+
assert "REDACTED" in captured.out
132+
# Ensure token values are not directly printed
133+
assert "ACCESS" not in captured.out
134+
assert "VAULT" not in captured.out

0 commit comments

Comments
 (0)