feat(observability): log throttle events and AIMD transitions at INFO/DEBUG

jhamon · jhamon · commit 41ea43409ae3 · 2026-06-01T18:26:26.000Z
Add structured log lines to the retry and adaptive-concurrency layers so
operators can diagnose throttle storms from logs alone.

- `pinecone._internal.http_client`: DEBUG per throttled retry attempt
  (status, host, attempt N/total, computed delay, Retry-After value)
- `pinecone._internal.adaptive`: DEBUG on every AIMD limit decrease/increase
  (only on real transitions, not per-success); INFO on first throttle
  per host with a user-actionable message linking to docs
- `tests/unit/_internal/test_retry_logging.py`: 6 new tests covering
  sync and async throttle logging, AIMD decrease/increase/ceiling-floor
  log behaviour, and first-throttle-per-host INFO dedupe
- `docs/guides/retries.md`: new Observability section documenting log
  namespaces, INFO vs DEBUG semantics, and field meanings
diff --git a/docs/guides/retries.md b/docs/guides/retries.md
@@ -312,6 +312,68 @@ this matters.
 
 ---
 
+## Observability
+
+The SDK emits structured log records so you can diagnose retry storms and throttling
+pressure without adding instrumentation yourself.
+
+### Log namespaces
+
+| Logger | Events |
+|--------|--------|
+| `pinecone._internal.http_client` | Throttled HTTP response received; retry delay computed |
+| `pinecone._internal.adaptive` | AIMD concurrency limit transitions |
+
+### INFO messages
+
+An INFO-level record is emitted the **first time** a given host rate-limits a client
+instance:
+
+```
+Rate limited by host=<host>. Adaptive concurrency will reduce in-flight requests.
+See https://docs.pinecone.io/python/retries for details.
+```
+
+This fires once per host per `Pinecone` / `AsyncPinecone` object, so it surfaces in your
+logs without flooding them on repeated throttling.
+
+### DEBUG messages
+
+Enable DEBUG-level logging on the two namespaces above to see granular retry events:
+
+```python
+import logging
+logging.getLogger("pinecone._internal.http_client").setLevel(logging.DEBUG)
+logging.getLogger("pinecone._internal.adaptive").setLevel(logging.DEBUG)
+```
+
+**Throttle record** (emitted once per retry attempt that receives a retryable response):
+
+```
+Throttled response: status=429 host=my-index.svc.pinecone.io attempt=1/4 delay=0.531s retry_after=absent
+```
+
+Fields: `status` (HTTP status code), `host`, `attempt` (N of total attempts),
+`delay` (computed wait in seconds), `retry_after` (parsed `Retry-After` header value or
+`absent`).
+
+**AIMD limit decrease** (emitted when the adaptive limiter reduces concurrency):
+
+```
+AIMD limiter decreased: before=8 after=4 ceiling=8
+```
+
+**AIMD limit increase** (emitted when the limiter recovers a concurrency slot):
+
+```
+AIMD limiter increased: now=5 ceiling=8
+```
+
+Increase records only fire on actual transitions — not on every successful request —
+so the volume is proportional to recovery events, not request throughput.
+
+---
+
 ## See Also
 
 - {doc}`/guides/error-handling` — Exception hierarchy, `RateLimitError.retry_after`, and how to catch specific errors
diff --git a/pinecone/_internal/adaptive.py b/pinecone/_internal/adaptive.py
@@ -11,11 +11,10 @@
 
 from __future__ import annotations
 
+import logging
 import threading
-from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-    pass
+logger = logging.getLogger(__name__)
 
 
 class _AdaptiveLimiter:
@@ -48,16 +47,33 @@ def current_limit(self) -> int:
     def report_throttled(self) -> None:
         """Halve the limit (floored at 1) and reset the success streak."""
         with self._lock:
+            before = self._limit
             self._limit = max(1, self._limit // 2)
             self._success_streak = 0
+        if before != self._limit:
+            logger.debug(
+                "AIMD limiter decreased: before=%d after=%d ceiling=%d",
+                before,
+                self._limit,
+                self._ceiling,
+            )
 
     def report_success(self) -> None:
         """Increment the success streak; bump limit by 1 if streak hits current limit."""
         with self._lock:
             self._success_streak += 1
+            increased = False
             if self._success_streak >= self._limit:
-                self._limit = min(self._ceiling, self._limit + 1)
+                if self._limit < self._ceiling:
+                    self._limit = self._limit + 1
+                    increased = True
                 self._success_streak = 0
+        if increased:
+            logger.debug(
+                "AIMD limiter increased: now=%d ceiling=%d",
+                self._limit,
+                self._ceiling,
+            )
 
     def update_ceiling(self, ceiling: int) -> None:
         """Re-anchor the ceiling (e.g., a later bulk call uses a different max_concurrency).
@@ -82,11 +98,12 @@ class _AdaptiveLimiterRegistry:
     each batch dispatch.
     """
 
-    __slots__ = ("_limiters", "_lock")
+    __slots__ = ("_ever_throttled", "_limiters", "_lock")
 
     def __init__(self) -> None:
         self._lock = threading.Lock()
         self._limiters: dict[str, _AdaptiveLimiter] = {}
+        self._ever_throttled: set[str] = set()
 
     def get(self, host: str, ceiling: int) -> _AdaptiveLimiter:
         """Return the limiter for ``host``, creating one with ``ceiling`` if absent.
@@ -113,5 +130,13 @@ def report_throttled(self, host: str) -> None:
         """
         with self._lock:
             limiter = self._limiters.get(host)
+            first_time = host not in self._ever_throttled
+            self._ever_throttled.add(host)
         if limiter is not None:
             limiter.report_throttled()
+        if first_time:
+            logger.info(
+                "Rate limited by host=%s. Adaptive concurrency will reduce in-flight requests. "
+                "See https://docs.pinecone.io/python/retries for details.",
+                host,
+            )
diff --git a/pinecone/_internal/http_client.py b/pinecone/_internal/http_client.py
@@ -194,6 +194,16 @@ def handle_request(self, request: httpx.Request) -> httpx.Response:
                 response.close()
                 delay = _compute_retry_after_delay(self._config, response, attempt, prev_delay)
                 prev_delay = delay
+                logger.debug(
+                    "Throttled response: status=%d host=%s attempt=%d/%d"
+                    " delay=%.3fs retry_after=%s",
+                    response.status_code,
+                    request.url.host,
+                    attempt + 1,
+                    self._config.max_retries + 1,
+                    delay,
+                    response.headers.get("retry-after", "absent"),
+                )
                 time.sleep(delay)
             else:
                 return response
@@ -244,6 +254,16 @@ async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
                 await response.aclose()
                 delay = _compute_retry_after_delay(self._config, response, attempt, prev_delay)
                 prev_delay = delay
+                logger.debug(
+                    "Throttled response: status=%d host=%s attempt=%d/%d"
+                    " delay=%.3fs retry_after=%s",
+                    response.status_code,
+                    request.url.host,
+                    attempt + 1,
+                    self._config.max_retries + 1,
+                    delay,
+                    response.headers.get("retry-after", "absent"),
+                )
                 await asyncio.sleep(delay)
             else:
                 return response
diff --git a/tests/unit/_internal/test_retry_logging.py b/tests/unit/_internal/test_retry_logging.py
@@ -0,0 +1,119 @@
+"""Tests for throttle-event and AIMD-transition log lines."""
+
+from __future__ import annotations
+
+import logging
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from pinecone._internal.adaptive import _AdaptiveLimiter, _AdaptiveLimiterRegistry
+from pinecone._internal.config import RetryConfig
+from pinecone._internal.http_client import _AsyncRetryTransport, _RetryTransport
+
+
+def _transport(max_retries: int = 1) -> tuple[_RetryTransport, MagicMock]:
+    inner = MagicMock(spec=httpx.BaseTransport)
+    cfg = RetryConfig(max_retries=max_retries, backoff_factor=0.001, max_wait=0.01)
+    return _RetryTransport(transport=inner, retry_config=cfg), inner  # type: ignore[arg-type]
+
+
+def _async_transport(max_retries: int = 1) -> tuple[_AsyncRetryTransport, AsyncMock]:
+    inner = AsyncMock(spec=httpx.AsyncBaseTransport)
+    cfg = RetryConfig(max_retries=max_retries, backoff_factor=0.001, max_wait=0.01)
+    return _AsyncRetryTransport(transport=inner, retry_config=cfg), inner  # type: ignore[arg-type]
+
+
+def _req() -> httpx.Request:
+    return httpx.Request("POST", "https://example.com/test")
+
+
+def test_throttle_response_logs_debug_with_fields(caplog: pytest.LogCaptureFixture) -> None:
+    rt, inner = _transport(max_retries=1)
+    inner.handle_request.side_effect = [
+        httpx.Response(429),
+        httpx.Response(200),
+    ]
+    with (
+        caplog.at_level(logging.DEBUG, logger="pinecone._internal.http_client"),
+        patch("pinecone._internal.http_client.time.sleep"),
+    ):
+        rt.handle_request(_req())
+
+    throttle_records = [r for r in caplog.records if "Throttled response" in r.getMessage()]
+    assert len(throttle_records) == 1
+    msg = throttle_records[0].getMessage()
+    assert throttle_records[0].levelname == "DEBUG"
+    assert "status=429" in msg
+    assert "host=example.com" in msg
+
+
+def test_aimd_decrease_logs_debug(caplog: pytest.LogCaptureFixture) -> None:
+    lim = _AdaptiveLimiter(ceiling=8)
+    with caplog.at_level(logging.DEBUG, logger="pinecone._internal.adaptive"):
+        lim.report_throttled()
+
+    records = [r for r in caplog.records if "AIMD limiter decreased" in r.getMessage()]
+    assert len(records) == 1
+    assert "before=8 after=4" in records[0].getMessage()
+
+
+def test_aimd_increase_logs_debug_only_on_transition(caplog: pytest.LogCaptureFixture) -> None:
+    lim = _AdaptiveLimiter(ceiling=8)
+    lim.report_throttled()  # 8 → 4; limit is now 4, streak is 0
+    # Need exactly 4 successful calls to cross the threshold (streak reaches limit)
+    with caplog.at_level(logging.DEBUG, logger="pinecone._internal.adaptive"):
+        for _ in range(4):
+            lim.report_success()
+
+    increase_records = [r for r in caplog.records if "AIMD limiter increased" in r.getMessage()]
+    assert len(increase_records) == 1
+
+
+def test_aimd_no_log_when_at_ceiling(caplog: pytest.LogCaptureFixture) -> None:
+    lim = _AdaptiveLimiter(ceiling=8)
+    # No throttle; limit starts at ceiling (8)
+    with caplog.at_level(logging.DEBUG, logger="pinecone._internal.adaptive"):
+        for _ in range(100):
+            lim.report_success()
+
+    increase_records = [r for r in caplog.records if "AIMD limiter increased" in r.getMessage()]
+    assert len(increase_records) == 0
+
+
+def test_first_throttle_per_host_logs_info(caplog: pytest.LogCaptureFixture) -> None:
+    reg = _AdaptiveLimiterRegistry()
+    reg.get("api-1.pinecone.io", 8)
+
+    with caplog.at_level(logging.INFO, logger="pinecone._internal.adaptive"):
+        reg.report_throttled("api-1.pinecone.io")
+        reg.report_throttled("api-1.pinecone.io")
+
+    info_records = [
+        r
+        for r in caplog.records
+        if r.levelname == "INFO" and "Rate limited by host=api-1.pinecone.io" in r.getMessage()
+    ]
+    assert len(info_records) == 1
+
+
+@pytest.mark.asyncio
+async def test_async_throttle_response_logs_debug(caplog: pytest.LogCaptureFixture) -> None:
+    rt, inner = _async_transport(max_retries=1)
+    inner.handle_async_request.side_effect = [
+        httpx.Response(429),
+        httpx.Response(200),
+    ]
+    with (
+        caplog.at_level(logging.DEBUG, logger="pinecone._internal.http_client"),
+        patch("pinecone._internal.http_client.asyncio.sleep"),
+    ):
+        await rt.handle_async_request(_req())
+
+    throttle_records = [r for r in caplog.records if "Throttled response" in r.getMessage()]
+    assert len(throttle_records) == 1
+    msg = throttle_records[0].getMessage()
+    assert throttle_records[0].levelname == "DEBUG"
+    assert "status=429" in msg
+    assert "host=example.com" in msg