Skip to content

Commit f4c476b

Browse files
committed
Address feedback (#1)
1 parent 10fb739 commit f4c476b

65 files changed

Lines changed: 1180 additions & 2061 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/02_concepts/code/05_retries_async.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ async def main() -> None:
99
apify_client = ApifyClientAsync(
1010
token=TOKEN,
1111
max_retries=8,
12-
min_delay_between_retries=timedelta(milliseconds=500), # 0.5s
13-
timeout=timedelta(seconds=360), # 6 mins
12+
min_delay_between_retries=timedelta(milliseconds=500),
13+
timeout=timedelta(seconds=360),
1414
)

docs/02_concepts/code/05_retries_sync.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ async def main() -> None:
99
apify_client = ApifyClient(
1010
token=TOKEN,
1111
max_retries=8,
12-
min_delay_between_retries=timedelta(milliseconds=500), # 0.5s
13-
timeout=timedelta(seconds=360), # 6 mins
12+
min_delay_between_retries=timedelta(milliseconds=500),
13+
timeout=timedelta(seconds=360),
1414
)

scripts/check_async_docstrings.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616

1717
# Go through every Python file in that directory
1818
for client_source_path in clients_path.glob('**/*.py'):
19-
# Skip _http_clients package - sync and async are in separate files there
20-
if '_http_clients' in str(client_source_path):
21-
continue
22-
2319
with open(client_source_path, encoding='utf-8') as source_file:
2420
# Read the source file and parse the code using Red Baron
2521
red = RedBaron(source_code=source_file.read())
@@ -31,8 +27,6 @@
3127

3228
# Find the corresponding sync classes (same name, but without -Async)
3329
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
34-
if not sync_class:
35-
continue
3630

3731
# Go through all methods in the async class
3832
for async_method in async_class.find_all('DefNode'):

scripts/fix_async_docstrings.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,15 @@
1111

1212
# Go through every Python file in that directory
1313
for client_source_path in clients_path.glob('**/*.py'):
14-
# Skip _http_clients package - sync and async are in separate files there
15-
if '_http_clients' in str(client_source_path):
16-
continue
17-
1814
with open(client_source_path, 'r+', encoding='utf-8') as source_file:
1915
# Read the source file and parse the code using Red Baron
2016
red = RedBaron(source_code=source_file.read())
2117

2218
# Find all classes which end with "ClientAsync" (there should be at most 1 per file)
2319
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
24-
if not async_class:
25-
continue
2620

2721
# Find the corresponding sync classes (same name, but without -Async)
2822
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
29-
if not sync_class:
30-
continue
3123

3224
# Go through all methods in the async class
3325
for async_method in async_class.find_all('DefNode'):

scripts/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ def sync_to_async_docstring(docstring: str) -> str:
4949
(r'Client', r'ClientAsync'),
5050
(r'\bsynchronously\b', r'asynchronously'),
5151
(r'\bSynchronously\b', r'Asynchronously'),
52+
(r'\bsynchronous\b', r'asynchronous'),
53+
(r'\bSynchronous\b', r'Asynchronous'),
54+
(r'Retry a function', r'Retry an async function'),
55+
(r'Function to retry', r'Async function to retry'),
5256
]
5357
res = docstring
5458
for pattern, replacement in substitutions:

src/apify_client/_apify_client.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from __future__ import annotations
22

3+
import warnings
34
from functools import cached_property
4-
from typing import TYPE_CHECKING
5+
from typing import TYPE_CHECKING, ClassVar
56

67
from apify_client._client_registry import ClientRegistry, ClientRegistryAsync
78
from apify_client._consts import (
@@ -11,7 +12,7 @@
1112
DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
1213
DEFAULT_TIMEOUT,
1314
)
14-
from apify_client._http_clients import AsyncHttpClient, SyncHttpClient
15+
from apify_client._http_clients import HttpClient, HttpClientAsync
1516
from apify_client._resource_clients import (
1617
ActorClient,
1718
ActorClientAsync,
@@ -81,6 +82,8 @@
8182
class ApifyClient:
8283
"""The Apify API client."""
8384

85+
_OVERRIDABLE_DEFAULT_HEADERS: ClassVar[set[str]] = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'}
86+
8487
def __init__(
8588
self,
8689
token: str | None = None,
@@ -90,6 +93,7 @@ def __init__(
9093
max_retries: int = DEFAULT_MAX_RETRIES,
9194
min_delay_between_retries: timedelta = DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
9295
timeout: timedelta = DEFAULT_TIMEOUT,
96+
headers: dict[str, str] | None = None,
9397
) -> None:
9498
"""Initialize a new instance.
9599
@@ -103,11 +107,15 @@ def __init__(
103107
min_delay_between_retries: How long will the client wait between retrying requests
104108
(increases exponentially from this value).
105109
timeout: The socket timeout of the HTTP requests sent to the Apify API.
110+
headers: Additional HTTP headers to include in all API requests.
106111
"""
107112
# We need to do this because of mocking in tests and default mutable arguments.
108113
api_url = DEFAULT_API_URL if api_url is None else api_url
109114
api_public_url = DEFAULT_API_URL if api_public_url is None else api_public_url
110115

116+
if headers:
117+
self._check_custom_headers(headers)
118+
111119
self._token = token
112120
"""Apify API token for authentication."""
113121

@@ -120,12 +128,13 @@ def __init__(
120128
self._statistics = ClientStatistics()
121129
"""Collector for client request statistics."""
122130

123-
self._http_client = SyncHttpClient(
131+
self._http_client = HttpClient(
124132
token=token,
125133
timeout=timeout or DEFAULT_TIMEOUT,
126134
max_retries=max_retries or DEFAULT_MAX_RETRIES,
127135
min_delay_between_retries=min_delay_between_retries or DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
128136
statistics=self._statistics,
137+
headers=headers,
129138
)
130139
"""HTTP client used to communicate with the Apify API."""
131140

@@ -172,6 +181,18 @@ def _base_kwargs(self) -> dict:
172181
'client_registry': self._client_registry,
173182
}
174183

184+
def _check_custom_headers(self, headers: dict[str, str]) -> None:
185+
"""Warn if custom headers override important default headers."""
186+
overwrite_headers = [key for key in headers if key.title() in self._OVERRIDABLE_DEFAULT_HEADERS]
187+
if overwrite_headers:
188+
warnings.warn(
189+
f'{", ".join(overwrite_headers)} headers of {self.__class__.__name__} was overridden with an '
190+
'explicit value. A wrong header value can lead to API errors, it is recommended to use the default '
191+
f'value for following headers: {", ".join(self._OVERRIDABLE_DEFAULT_HEADERS)}.',
192+
category=UserWarning,
193+
stacklevel=3,
194+
)
195+
175196
@property
176197
def token(self) -> str | None:
177198
"""The Apify API token used by the client."""
@@ -322,6 +343,8 @@ def store(self) -> StoreCollectionClient:
322343
class ApifyClientAsync:
323344
"""The asynchronous version of the Apify API client."""
324345

346+
_OVERRIDABLE_DEFAULT_HEADERS: ClassVar[set[str]] = {'Accept', 'Authorization', 'Accept-Encoding', 'User-Agent'}
347+
325348
def __init__(
326349
self,
327350
token: str | None = None,
@@ -331,6 +354,7 @@ def __init__(
331354
max_retries: int = DEFAULT_MAX_RETRIES,
332355
min_delay_between_retries: timedelta = DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
333356
timeout: timedelta = DEFAULT_TIMEOUT,
357+
headers: dict[str, str] | None = None,
334358
) -> None:
335359
"""Initialize a new instance.
336360
@@ -344,11 +368,15 @@ def __init__(
344368
min_delay_between_retries: How long will the client wait between retrying requests
345369
(increases exponentially from this value).
346370
timeout: The socket timeout of the HTTP requests sent to the Apify API.
371+
headers: Additional HTTP headers to include in all API requests.
347372
"""
348373
# We need to do this because of mocking in tests and default mutable arguments.
349374
api_url = DEFAULT_API_URL if api_url is None else api_url
350375
api_public_url = DEFAULT_API_URL if api_public_url is None else api_public_url
351376

377+
if headers:
378+
self._check_custom_headers(headers)
379+
352380
self._token = token
353381
"""Apify API token for authentication."""
354382

@@ -361,12 +389,13 @@ def __init__(
361389
self._statistics = ClientStatistics()
362390
"""Collector for client request statistics."""
363391

364-
self._http_client = AsyncHttpClient(
392+
self._http_client = HttpClientAsync(
365393
token=token,
366394
timeout=timeout or DEFAULT_TIMEOUT,
367395
max_retries=max_retries or DEFAULT_MAX_RETRIES,
368396
min_delay_between_retries=min_delay_between_retries or DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
369397
statistics=self._statistics,
398+
headers=headers,
370399
)
371400
"""HTTP client used to communicate with the Apify API."""
372401

@@ -413,6 +442,18 @@ def _base_kwargs(self) -> dict:
413442
'client_registry': self._client_registry,
414443
}
415444

445+
def _check_custom_headers(self, headers: dict[str, str]) -> None:
446+
"""Warn if custom headers override important default headers."""
447+
overwrite_headers = [key for key in headers if key.title() in self._OVERRIDABLE_DEFAULT_HEADERS]
448+
if overwrite_headers:
449+
warnings.warn(
450+
f'{", ".join(overwrite_headers)} headers of {self.__class__.__name__} was overridden with an '
451+
'explicit value. A wrong header value can lead to API errors, it is recommended to use the default '
452+
f'value for following headers: {", ".join(self._OVERRIDABLE_DEFAULT_HEADERS)}.',
453+
category=UserWarning,
454+
stacklevel=3,
455+
)
456+
416457
@property
417458
def token(self) -> str | None:
418459
"""The Apify API token used by the client."""

src/apify_client/_http_clients/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from ._async import AsyncHttpClient
2-
from ._sync import SyncHttpClient
1+
from ._http_client import HttpClient, HttpClientAsync
32

43
__all__ = [
54
'HttpClient',

src/apify_client/_http_clients/_base.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def __init__(
3434
max_retries: int = DEFAULT_MAX_RETRIES,
3535
min_delay_between_retries: timedelta = DEFAULT_MIN_DELAY_BETWEEN_RETRIES,
3636
statistics: ClientStatistics | None = None,
37+
headers: dict[str, str] | None = None,
3738
) -> None:
3839
"""Initialize the base HTTP client.
3940
@@ -43,30 +44,31 @@ def __init__(
4344
max_retries: Maximum number of retries for failed requests.
4445
min_delay_between_retries: Minimum delay between retries.
4546
statistics: Statistics tracker for API calls. Created automatically if not provided.
47+
headers: Additional HTTP headers to include in all requests.
4648
"""
4749
self._timeout = timeout
4850
self._max_retries = max_retries
4951
self._min_delay_between_retries = min_delay_between_retries
5052
self._statistics = statistics or ClientStatistics()
5153

5254
# Build headers for subclasses to use when creating their impit clients.
53-
headers: dict[str, str] = {'Accept': 'application/json, */*'}
55+
default_headers: dict[str, str] = {'Accept': 'application/json, */*'}
5456

5557
workflow_key = os.getenv('APIFY_WORKFLOW_KEY')
5658
if workflow_key is not None:
57-
headers['X-Apify-Workflow-Key'] = workflow_key
59+
default_headers['X-Apify-Workflow-Key'] = workflow_key
5860

5961
is_at_home = 'APIFY_IS_AT_HOME' in os.environ
6062
python_version = '.'.join([str(x) for x in sys.version_info[:3]])
6163
client_version = metadata.version('apify-client')
6264

6365
user_agent = f'ApifyClient/{client_version} ({sys.platform}; Python/{python_version}); isAtHome/{is_at_home}'
64-
headers['User-Agent'] = user_agent
66+
default_headers['User-Agent'] = user_agent
6567

6668
if token is not None:
67-
headers['Authorization'] = f'Bearer {token}'
69+
default_headers['Authorization'] = f'Bearer {token}'
6870

69-
self._headers = headers
71+
self._headers = {**default_headers, **(headers or {})}
7072

7173
@staticmethod
7274
def _parse_params(params: dict[str, Any] | None) -> dict[str, Any] | None:
@@ -109,9 +111,9 @@ def _prepare_request_call(
109111
self,
110112
headers: dict[str, str] | None = None,
111113
params: dict[str, Any] | None = None,
112-
data: Any = None,
114+
data: str | bytes | bytearray | None = None,
113115
json: JsonSerializable | None = None,
114-
) -> tuple[dict[str, str], dict[str, Any] | None, Any]:
116+
) -> tuple[dict[str, str], dict[str, Any] | None, bytes | None]:
115117
"""Prepare headers, params, and body for an HTTP request. Serializes JSON and applies gzip compression."""
116118
if json is not None and data is not None:
117119
raise ValueError('Cannot pass both "json" and "data" parameters at the same time!')

0 commit comments

Comments
 (0)