UN-3230 [FEAT] Implement back-off retry mechanism for API deployment client

muhammad-ali-e · claude · muhammad-ali-e · commit 2f91f45b6c3a · 2026-02-11T10:11:02.000+05:30
Add exponential back-off retry with full jitter to APIDeploymentsClient
for improved reliability against transient 5xx errors and 429 rate limits.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
@@ -29,6 +29,8 @@ Then, create an instance of the `APIDeploymentsClient`:
 client = APIDeploymentsClient(api_url="url", api_key="your_api_key")
 ```
 
+> **Note:** Pass the raw API key **without** the `"Bearer "` prefix — the client adds it automatically.
+
 Now, you can use the client to interact with the Unstract API deployments API:
 
 ```python
@@ -61,10 +63,42 @@ except APIDeploymentsClientException as e:
 
 ## Parameter Details
 
+`api_url`: The URL of the Unstract API deployment.
+`api_key`: Your raw API key. **Do not** include the `"Bearer "` prefix — the client adds it automatically.
 `api_timeout`: Set a timeout for API requests, e.g., `api_timeout=10`.
 `logging_level`: Set logging verbosity (e.g., "`DEBUG`").
 `include_metadata`: If set to `True`, the response will include additional metadata (cost, tokens consumed and context) for each call made by the Prompt Studio exported tool.
 
+## Retry Configuration
+
+The client includes built-in exponential backoff retry with the following behavior:
+
+- **Async mode** (`api_timeout=0`): POST requests are retried on transient failures (5xx, 429) and connection errors, since the server returns immediately after queuing.
+- **Sync mode** (`api_timeout > 0`, the default): POST requests are **not** retried, because the server blocks during processing — a failure may mean the request was processed but the response was lost.
+- **Status polling** (`check_execution_status`): GET requests are always retried, as they are idempotent.
+
+Retries are enabled by default and can be customized:
+
+```python
+client = APIDeploymentsClient(
+    api_url="url",
+    api_key="your_api_key",
+    max_retries=4,       # Max retry attempts (default: 4, set to 0 to disable)
+    initial_delay=2.0,   # Initial delay in seconds (default: 2.0)
+    max_delay=60.0,      # Maximum delay cap in seconds (default: 60.0)
+    backoff_factor=2.0,  # Multiplier per retry (default: 2.0)
+)
+```
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `max_retries` | `4` | Maximum number of retry attempts. Set to `0` to disable retries. |
+| `initial_delay` | `2.0` | Initial delay in seconds before the first retry. |
+| `max_delay` | `60.0` | Maximum delay cap in seconds between retries. |
+| `backoff_factor` | `2.0` | Multiplier applied to the delay for each subsequent retry. |
+
+The retry logic uses exponential backoff with full jitter and respects the `Retry-After` header on 429 responses.
+
 
 ## Questions and Feedback
 
diff --git a/src/unstract/api_deployments/__init__.py b/src/unstract/api_deployments/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.1.0"
+__version__ = "1.2.0"
 
 from .client import APIDeploymentsClient
 
diff --git a/src/unstract/api_deployments/client.py b/src/unstract/api_deployments/client.py
@@ -10,10 +10,12 @@
 import logging
 import ntpath
 import os
+import random
+import time
 from urllib.parse import urlparse
 
 import requests
-from requests.exceptions import JSONDecodeError
+from requests.exceptions import ConnectionError, JSONDecodeError, Timeout
 
 from unstract.api_deployments.utils import UnstractUtils
 
@@ -54,14 +56,22 @@ def __init__(
         api_timeout: int = 300,
         logging_level: str = "INFO",
         include_metadata: bool = False,
-        verify: bool = True
+        verify: bool = True,
+        max_retries: int = 4,
+        initial_delay: float = 2.0,
+        max_delay: float = 60.0,
+        backoff_factor: float = 2.0,
     ):
         """Initializes the APIClient class.
 
         Args:
             api_key (str): The API key to authenticate the API request.
             api_timeout (int): The timeout to wait for the API response.
             logging_level (str): The logging level to log messages.
+            max_retries (int): Maximum number of retry attempts for failed requests.
+            initial_delay (float): Initial delay in seconds before the first retry.
+            max_delay (float): Maximum delay in seconds between retries.
+            backoff_factor (float): Multiplier applied to delay for each retry.
         """
         if logging_level == "":
             logging_level = os.getenv("UNSTRACT_API_CLIENT_LOGGING_LEVEL", "INFO")
@@ -88,6 +98,21 @@ def __init__(
         self.__save_base_url(api_url)
         self.include_metadata = include_metadata
         self.verify = verify
+        self.max_retries = max_retries
+        self.initial_delay = initial_delay
+        self.max_delay = max_delay
+        self.backoff_factor = backoff_factor
+
+    def _is_retryable_status(self, status_code: int) -> bool:
+        """Checks whether a status code should trigger a retry.
+
+        Args:
+            status_code (int): The HTTP status code to check.
+
+        Returns:
+            bool: True if the request should be retried.
+        """
+        return status_code >= 500 or status_code == 429
 
     def __save_base_url(self, full_url: str):
         """Extracts the base URL from the full URL and saves it.
@@ -99,6 +124,124 @@ def __save_base_url(self, full_url: str):
         self.base_url = parsed_url.scheme + "://" + parsed_url.netloc
         self.logger.debug("Base URL: " + self.base_url)
 
+    def _calculate_delay(self, attempt: int) -> float:
+        """Calculates the delay before the next retry using exponential backoff
+        with full jitter.
+
+        Args:
+            attempt (int): The current retry attempt number (0-indexed).
+
+        Returns:
+            float: The delay in seconds.
+        """
+        exp_delay = min(
+            self.initial_delay * (self.backoff_factor**attempt), self.max_delay
+        )
+        return random.uniform(0, exp_delay)
+
+    def _get_retry_delay(self, response, attempt: int) -> float:
+        """Returns the delay before the next retry.
+
+        For 429 responses, respects the Retry-After header if present.
+        Otherwise falls back to exponential backoff with jitter.
+        """
+        if response is not None and response.status_code == 429:
+            retry_after = response.headers.get("Retry-After")
+            if retry_after is not None:
+                try:
+                    return float(retry_after)
+                except (ValueError, TypeError):
+                    pass
+        return self._calculate_delay(attempt)
+
+    @staticmethod
+    def _rewind_files(files):
+        """Rewinds file objects so they can be re-sent on retry."""
+        for file_tuple in files:
+            file_obj = file_tuple[1]
+            if hasattr(file_obj, "seek"):
+                file_obj.seek(0)
+            elif isinstance(file_obj, tuple) and len(file_obj) >= 2:
+                if hasattr(file_obj[1], "seek"):
+                    file_obj[1].seek(0)
+
+    def _request_with_retry(self, method: str, url: str, **kwargs) -> requests.Response:
+        """Makes an HTTP request with exponential backoff retry logic.
+
+        Args:
+            method (str): The HTTP method (e.g., "GET", "POST").
+            url (str): The request URL.
+            **kwargs: Additional keyword arguments passed to requests.request().
+
+        Returns:
+            requests.Response: The response from the request.
+
+        Raises:
+            ConnectionError: If a connection error persists after all retries.
+            Timeout: If a timeout persists after all retries.
+        """
+        response = None
+
+        for attempt in range(self.max_retries + 1):
+            # Rewind file objects for retry attempts
+            if attempt > 0:
+                files = kwargs.get("files")
+                if files:
+                    self._rewind_files(files)
+
+            try:
+                response = requests.request(method, url, **kwargs)
+
+                if not self._is_retryable_status(response.status_code):
+                    return response
+
+                if attempt < self.max_retries:
+                    delay = self._get_retry_delay(response, attempt)
+                    self.logger.warning(
+                        "Request to %s returned %d. Retrying in %.1fs "
+                        "(attempt %d/%d).",
+                        url,
+                        response.status_code,
+                        delay,
+                        attempt + 1,
+                        self.max_retries,
+                    )
+                    time.sleep(delay)
+                else:
+                    self.logger.warning(
+                        "Request to %s returned %d. Retries exhausted (%d/%d).",
+                        url,
+                        response.status_code,
+                        self.max_retries,
+                        self.max_retries,
+                    )
+
+            except (ConnectionError, Timeout) as exc:
+                response = None
+                if attempt < self.max_retries:
+                    delay = self._get_retry_delay(None, attempt)
+                    self.logger.warning(
+                        "%s during request to %s. Retrying in %.1fs "
+                        "(attempt %d/%d).",
+                        type(exc).__name__,
+                        url,
+                        delay,
+                        attempt + 1,
+                        self.max_retries,
+                    )
+                    time.sleep(delay)
+                else:
+                    self.logger.warning(
+                        "%s during request to %s. Retries exhausted (%d/%d).",
+                        type(exc).__name__,
+                        url,
+                        self.max_retries,
+                        self.max_retries,
+                    )
+                    raise
+
+        return response
+
     def structure_file(self, file_paths: list[str]) -> dict:
         """Invokes the API deployed on the Unstract platform.
 
@@ -115,7 +258,10 @@ def structure_file(self, file_paths: list[str]) -> dict:
             "Authorization": "Bearer " + self.api_key,
         }
 
-        data = {"timeout": self.api_timeout, "include_metadata": self.include_metadata}
+        form_data = {
+            "timeout": self.api_timeout,
+            "include_metadata": self.include_metadata,
+        }
 
         files = []
 
@@ -133,13 +279,28 @@ def structure_file(self, file_paths: list[str]) -> dict:
         except FileNotFoundError as e:
             raise APIDeploymentsClientException("File not found: " + str(e))
 
-        response = requests.post(
-            self.api_url,
-            headers=headers,
-            data=data,
-            files=files,
-            verify=self.verify,
-        )
+        if self.api_timeout == 0:
+            # Async mode: server returns immediately after queuing.
+            # A 5xx means queuing failed — safe to retry.
+            response = self._request_with_retry(
+                "POST",
+                self.api_url,
+                headers=headers,
+                data=form_data,
+                files=files,
+                verify=self.verify,
+            )
+        else:
+            # Sync mode: server blocks during processing.
+            # A 5xx may mean it processed but response was lost — don't retry
+            # to avoid duplicate executions.
+            response = requests.post(
+                self.api_url,
+                headers=headers,
+                data=form_data,
+                files=files,
+                verify=self.verify,
+            )
         self.logger.debug(response.status_code)
         self.logger.debug(response.text)
         # The returned object is wrapped in a "message" key.
@@ -194,14 +355,16 @@ def structure_file(self, file_paths: list[str]) -> dict:
             "extraction_result": extraction_result,
         }
 
-        # Check if the status is pending or if it's successful but lacks a result
-        if 200 <= response.status_code < 300:
-            if execution_status in self.in_progress_statuses or (
-                execution_status == "SUCCESS" and not extraction_result
-            ):
-                obj_to_return.update(
-                    {"status_check_api_endpoint": status_api_endpoint, "pending": True}
-                )
+        # Check if the status is pending or if it's successful but lacks a result.
+        # Per the Unstract Status API migration guide (Option 1), we determine
+        # pending state from the response body alone, ignoring the HTTP status
+        # code — the server currently returns 422 for PENDING/EXECUTING.
+        if execution_status in self.in_progress_statuses or (
+            execution_status == "SUCCESS" and not extraction_result
+        ):
+            obj_to_return.update(
+                {"status_check_api_endpoint": status_api_endpoint, "pending": True}
+            )
 
         return obj_to_return
 
@@ -221,7 +384,8 @@ def check_execution_status(self, status_check_api_endpoint: str) -> dict:
         }
         status_call_url = self.base_url + status_check_api_endpoint
         self.logger.debug("Checking execution status via endpoint: " + status_call_url)
-        response = requests.get(
+        response = self._request_with_retry(
+            "GET",
             status_call_url,
             headers=headers,
             params={"include_metadata": self.include_metadata},
@@ -265,10 +429,14 @@ def check_execution_status(self, status_check_api_endpoint: str) -> dict:
         # If the execution status is pending, extract the execution ID from the response
         # and return it in the response.
         # Later, users can use the execution ID to check the status of the execution.
-        if (
-            200 <= response.status_code < 500
-            and obj_to_return["execution_status"] in self.in_progress_statuses
-        ):
+        if obj_to_return["execution_status"] in self.in_progress_statuses:
             obj_to_return["pending"] = True
+        elif self._is_retryable_status(response.status_code):
+            obj_to_return["pending"] = True
+            self.logger.warning(
+                "Status check returned %d after retries; "
+                "marking as pending to continue polling.",
+                response.status_code,
+            )
 
         return obj_to_return
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,30 @@
+# Tests
+
+## Unit Tests
+
+Mocked tests that require no external setup:
+
+```bash
+uv run pytest -s -v tests/
+```
+
+## Integration Test (`client_test.py`)
+
+This test runs against a live Unstract API deployment.
+
+### Setup
+
+1. Copy `tests/sample.env` to `.env` in the **project root**:
+   ```bash
+   cp tests/sample.env .env
+   ```
+2. Fill in the values:
+   - `API_URL` — your API deployment URL
+   - `UNSTRACT_API_DEPLOYMENT_KEY` — your raw API key (**without** the `"Bearer "` prefix; the client adds it automatically)
+   - `TEST_FILES` — comma-separated paths to files for structuring (e.g. `/path/to/test1.pdf,/path/to/test2.pdf`)
+
+### Run
+
+```bash
+uv run python tests/client_test.py
+```
diff --git a/tests/client_test.py b/tests/client_test.py
@@ -16,12 +16,12 @@ def main():
         adc = APIDeploymentsClient(
             api_url=os.getenv("API_URL"),
             api_key=os.getenv("UNSTRACT_API_DEPLOYMENT_KEY"),
-            api_timeout=10,
+            api_timeout=0,
             logging_level="DEBUG",
             include_metadata=False,
         )
-        # Replace files with pdfs
-        response = adc.structure_file(["<files>"])
+        file_paths = os.getenv("TEST_FILES", "").split(",")
+        response = adc.structure_file(file_paths)
         print(response)
         if response["pending"]:
             while True:
diff --git a/tests/sample.env b/tests/sample.env
@@ -1,2 +1,3 @@
-API_URL=
-UNSTRACT_API_DEPLOYMENT_KEY=
+API_URL="http://localhost:8000/deployment/api/<org_id>/<api_name>/"
+UNSTRACT_API_DEPLOYMENT_KEY="your-api-key-without-bearer-prefix"
+TEST_FILES="/path/to/test1.pdf,/path/to/test2.pdf"
diff --git a/tests/test_retry.py b/tests/test_retry.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.1.0"`
	`1`	`+__version__ = "1.2.0"`
`2`	`2`
`3`	`3`	`from .client import APIDeploymentsClient`
`4`	`4`