Skip to content

Commit b2b3ca2

Browse files
fix: sanitize endpoint path params
1 parent 746fed3 commit b2b3ca2

8 files changed

Lines changed: 254 additions & 28 deletions

File tree

src/hyperspell/_utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from ._path import path_template as path_template
12
from ._sync import asyncify as asyncify
23
from ._proxy import LazyProxy as LazyProxy
34
from ._utils import (

src/hyperspell/_utils/_path.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from __future__ import annotations
2+
3+
import re
4+
from typing import (
5+
Any,
6+
Mapping,
7+
Callable,
8+
)
9+
from urllib.parse import quote
10+
11+
# Matches '.' or '..' where each dot is either literal or percent-encoded (%2e / %2E).
12+
_DOT_SEGMENT_RE = re.compile(r"^(?:\.|%2[eE]){1,2}$")
13+
14+
_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}")
15+
16+
17+
def _quote_path_segment_part(value: str) -> str:
18+
"""Percent-encode `value` for use in a URI path segment.
19+
20+
Considers characters not in `pchar` set from RFC 3986 §3.3 to be unsafe.
21+
https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
22+
"""
23+
# quote() already treats unreserved characters (letters, digits, and -._~)
24+
# as safe, so we only need to add sub-delims, ':', and '@'.
25+
# Notably, unlike the default `safe` for quote(), / is unsafe and must be quoted.
26+
return quote(value, safe="!$&'()*+,;=:@")
27+
28+
29+
def _quote_query_part(value: str) -> str:
30+
"""Percent-encode `value` for use in a URI query string.
31+
32+
Considers &, = and characters not in `query` set from RFC 3986 §3.4 to be unsafe.
33+
https://datatracker.ietf.org/doc/html/rfc3986#section-3.4
34+
"""
35+
return quote(value, safe="!$'()*+,;:@/?")
36+
37+
38+
def _quote_fragment_part(value: str) -> str:
39+
"""Percent-encode `value` for use in a URI fragment.
40+
41+
Considers characters not in `fragment` set from RFC 3986 §3.5 to be unsafe.
42+
https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
43+
"""
44+
return quote(value, safe="!$&'()*+,;=:@/?")
45+
46+
47+
def _interpolate(
48+
template: str,
49+
values: Mapping[str, Any],
50+
quoter: Callable[[str], str],
51+
) -> str:
52+
"""Replace {name} placeholders in `template`, quoting each value with `quoter`.
53+
54+
Placeholder names are looked up in `values`.
55+
56+
Raises:
57+
KeyError: If a placeholder is not found in `values`.
58+
"""
59+
# re.split with a capturing group returns alternating
60+
# [text, name, text, name, ..., text] elements.
61+
parts = _PLACEHOLDER_RE.split(template)
62+
63+
for i in range(1, len(parts), 2):
64+
name = parts[i]
65+
if name not in values:
66+
raise KeyError(f"a value for placeholder {{{name}}} was not provided")
67+
val = values[name]
68+
if val is None:
69+
parts[i] = "null"
70+
elif isinstance(val, bool):
71+
parts[i] = "true" if val else "false"
72+
else:
73+
parts[i] = quoter(str(values[name]))
74+
75+
return "".join(parts)
76+
77+
78+
def path_template(template: str, /, **kwargs: Any) -> str:
79+
"""Interpolate {name} placeholders in `template` from keyword arguments.
80+
81+
Args:
82+
template: The template string containing {name} placeholders.
83+
**kwargs: Keyword arguments to interpolate into the template.
84+
85+
Returns:
86+
The template with placeholders interpolated and percent-encoded.
87+
88+
Safe characters for percent-encoding are dependent on the URI component.
89+
Placeholders in path and fragment portions are percent-encoded where the `segment`
90+
and `fragment` sets from RFC 3986 respectively are considered safe.
91+
Placeholders in the query portion are percent-encoded where the `query` set from
92+
RFC 3986 §3.3 is considered safe except for = and & characters.
93+
94+
Raises:
95+
KeyError: If a placeholder is not found in `kwargs`.
96+
ValueError: If resulting path contains /./ or /../ segments (including percent-encoded dot-segments).
97+
"""
98+
# Split the template into path, query, and fragment portions.
99+
fragment_template: str | None = None
100+
query_template: str | None = None
101+
102+
rest = template
103+
if "#" in rest:
104+
rest, fragment_template = rest.split("#", 1)
105+
if "?" in rest:
106+
rest, query_template = rest.split("?", 1)
107+
path_template = rest
108+
109+
# Interpolate each portion with the appropriate quoting rules.
110+
path_result = _interpolate(path_template, kwargs, _quote_path_segment_part)
111+
112+
# Reject dot-segments (. and ..) in the final assembled path. The check
113+
# runs after interpolation so that adjacent placeholders or a mix of static
114+
# text and placeholders that together form a dot-segment are caught.
115+
# Also reject percent-encoded dot-segments to protect against incorrectly
116+
# implemented normalization in servers/proxies.
117+
for segment in path_result.split("/"):
118+
if _DOT_SEGMENT_RE.match(segment):
119+
raise ValueError(f"Constructed path {path_result!r} contains dot-segment {segment!r} which is not allowed")
120+
121+
result = path_result
122+
if query_template is not None:
123+
result += "?" + _interpolate(query_template, kwargs, _quote_query_part)
124+
if fragment_template is not None:
125+
result += "#" + _interpolate(fragment_template, kwargs, _quote_fragment_part)
126+
127+
return result

src/hyperspell/resources/connections.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import httpx
66

77
from .._types import Body, Query, Headers, NotGiven, not_given
8+
from .._utils import path_template
89
from .._compat import cached_property
910
from .._resource import SyncAPIResource, AsyncAPIResource
1011
from .._response import (
@@ -86,7 +87,7 @@ def revoke(
8687
if not connection_id:
8788
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
8889
return self._delete(
89-
f"/connections/{connection_id}/revoke",
90+
path_template("/connections/{connection_id}/revoke", connection_id=connection_id),
9091
options=make_request_options(
9192
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
9293
),
@@ -160,7 +161,7 @@ async def revoke(
160161
if not connection_id:
161162
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
162163
return await self._delete(
163-
f"/connections/{connection_id}/revoke",
164+
path_template("/connections/{connection_id}/revoke", connection_id=connection_id),
164165
options=make_request_options(
165166
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
166167
),

src/hyperspell/resources/evaluate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from ..types import evaluate_score_query_params, evaluate_score_highlight_params
1010
from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
11-
from .._utils import maybe_transform, async_maybe_transform
11+
from .._utils import path_template, maybe_transform, async_maybe_transform
1212
from .._compat import cached_property
1313
from .._resource import SyncAPIResource, AsyncAPIResource
1414
from .._response import (
@@ -71,7 +71,7 @@ def get_query(
7171
if not query_id:
7272
raise ValueError(f"Expected a non-empty value for `query_id` but received {query_id!r}")
7373
return self._get(
74-
f"/evaluate/query/{query_id}",
74+
path_template("/evaluate/query/{query_id}", query_id=query_id),
7575
options=make_request_options(
7676
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
7777
),
@@ -110,7 +110,7 @@ def score_highlight(
110110
if not highlight_id:
111111
raise ValueError(f"Expected a non-empty value for `highlight_id` but received {highlight_id!r}")
112112
return self._post(
113-
f"/evaluate/highlight/{highlight_id}",
113+
path_template("/evaluate/highlight/{highlight_id}", highlight_id=highlight_id),
114114
body=maybe_transform(
115115
{
116116
"comment": comment,
@@ -153,7 +153,7 @@ def score_query(
153153
if not query_id:
154154
raise ValueError(f"Expected a non-empty value for `query_id` but received {query_id!r}")
155155
return self._post(
156-
f"/evaluate/query/{query_id}",
156+
path_template("/evaluate/query/{query_id}", query_id=query_id),
157157
body=maybe_transform({"score": score}, evaluate_score_query_params.EvaluateScoreQueryParams),
158158
options=make_request_options(
159159
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -208,7 +208,7 @@ async def get_query(
208208
if not query_id:
209209
raise ValueError(f"Expected a non-empty value for `query_id` but received {query_id!r}")
210210
return await self._get(
211-
f"/evaluate/query/{query_id}",
211+
path_template("/evaluate/query/{query_id}", query_id=query_id),
212212
options=make_request_options(
213213
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
214214
),
@@ -247,7 +247,7 @@ async def score_highlight(
247247
if not highlight_id:
248248
raise ValueError(f"Expected a non-empty value for `highlight_id` but received {highlight_id!r}")
249249
return await self._post(
250-
f"/evaluate/highlight/{highlight_id}",
250+
path_template("/evaluate/highlight/{highlight_id}", highlight_id=highlight_id),
251251
body=await async_maybe_transform(
252252
{
253253
"comment": comment,
@@ -290,7 +290,7 @@ async def score_query(
290290
if not query_id:
291291
raise ValueError(f"Expected a non-empty value for `query_id` but received {query_id!r}")
292292
return await self._post(
293-
f"/evaluate/query/{query_id}",
293+
path_template("/evaluate/query/{query_id}", query_id=query_id),
294294
body=await async_maybe_transform({"score": score}, evaluate_score_query_params.EvaluateScoreQueryParams),
295295
options=make_request_options(
296296
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout

src/hyperspell/resources/folders.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from ..types import folder_list_params, folder_set_policies_params
1111
from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
12-
from .._utils import maybe_transform, async_maybe_transform
12+
from .._utils import path_template, maybe_transform, async_maybe_transform
1313
from .._compat import cached_property
1414
from .._resource import SyncAPIResource, AsyncAPIResource
1515
from .._response import (
@@ -79,7 +79,7 @@ def list(
7979
if not connection_id:
8080
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
8181
return self._get(
82-
f"/connections/{connection_id}/folders",
82+
path_template("/connections/{connection_id}/folders", connection_id=connection_id),
8383
options=make_request_options(
8484
extra_headers=extra_headers,
8585
extra_query=extra_query,
@@ -119,7 +119,11 @@ def delete_policy(
119119
if not policy_id:
120120
raise ValueError(f"Expected a non-empty value for `policy_id` but received {policy_id!r}")
121121
return self._delete(
122-
f"/connections/{connection_id}/folder-policies/{policy_id}",
122+
path_template(
123+
"/connections/{connection_id}/folder-policies/{policy_id}",
124+
connection_id=connection_id,
125+
policy_id=policy_id,
126+
),
123127
options=make_request_options(
124128
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
125129
),
@@ -152,7 +156,7 @@ def list_policies(
152156
if not connection_id:
153157
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
154158
return self._get(
155-
f"/connections/{connection_id}/folder-policies",
159+
path_template("/connections/{connection_id}/folder-policies", connection_id=connection_id),
156160
options=make_request_options(
157161
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
158162
),
@@ -200,7 +204,7 @@ def set_policies(
200204
if not connection_id:
201205
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
202206
return self._post(
203-
f"/connections/{connection_id}/folder-policies",
207+
path_template("/connections/{connection_id}/folder-policies", connection_id=connection_id),
204208
body=maybe_transform(
205209
{
206210
"provider_folder_id": provider_folder_id,
@@ -270,7 +274,7 @@ async def list(
270274
if not connection_id:
271275
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
272276
return await self._get(
273-
f"/connections/{connection_id}/folders",
277+
path_template("/connections/{connection_id}/folders", connection_id=connection_id),
274278
options=make_request_options(
275279
extra_headers=extra_headers,
276280
extra_query=extra_query,
@@ -310,7 +314,11 @@ async def delete_policy(
310314
if not policy_id:
311315
raise ValueError(f"Expected a non-empty value for `policy_id` but received {policy_id!r}")
312316
return await self._delete(
313-
f"/connections/{connection_id}/folder-policies/{policy_id}",
317+
path_template(
318+
"/connections/{connection_id}/folder-policies/{policy_id}",
319+
connection_id=connection_id,
320+
policy_id=policy_id,
321+
),
314322
options=make_request_options(
315323
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
316324
),
@@ -343,7 +351,7 @@ async def list_policies(
343351
if not connection_id:
344352
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
345353
return await self._get(
346-
f"/connections/{connection_id}/folder-policies",
354+
path_template("/connections/{connection_id}/folder-policies", connection_id=connection_id),
347355
options=make_request_options(
348356
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
349357
),
@@ -391,7 +399,7 @@ async def set_policies(
391399
if not connection_id:
392400
raise ValueError(f"Expected a non-empty value for `connection_id` but received {connection_id!r}")
393401
return await self._post(
394-
f"/connections/{connection_id}/folder-policies",
402+
path_template("/connections/{connection_id}/folder-policies", connection_id=connection_id),
395403
body=await async_maybe_transform(
396404
{
397405
"provider_folder_id": provider_folder_id,

src/hyperspell/resources/integrations/integrations.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
)
1717
from ...types import integration_connect_params
1818
from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
19-
from ..._utils import maybe_transform, async_maybe_transform
19+
from ..._utils import path_template, maybe_transform, async_maybe_transform
2020
from ..._compat import cached_property
2121
from ..._resource import SyncAPIResource, AsyncAPIResource
2222
from ..._response import (
@@ -126,7 +126,7 @@ def connect(
126126
if not integration_id:
127127
raise ValueError(f"Expected a non-empty value for `integration_id` but received {integration_id!r}")
128128
return self._get(
129-
f"/integrations/{integration_id}/connect",
129+
path_template("/integrations/{integration_id}/connect", integration_id=integration_id),
130130
options=make_request_options(
131131
extra_headers=extra_headers,
132132
extra_query=extra_query,
@@ -218,7 +218,7 @@ async def connect(
218218
if not integration_id:
219219
raise ValueError(f"Expected a non-empty value for `integration_id` but received {integration_id!r}")
220220
return await self._get(
221-
f"/integrations/{integration_id}/connect",
221+
path_template("/integrations/{integration_id}/connect", integration_id=integration_id),
222222
options=make_request_options(
223223
extra_headers=extra_headers,
224224
extra_query=extra_query,

0 commit comments

Comments
 (0)