From 2c691ac32090780769c46e9cc3ba559481ccc9f2 Mon Sep 17 00:00:00 2001 From: "open-swe[bot]" Date: Wed, 27 May 2026 19:10:52 +0800 Subject: [PATCH 1/2] fix(crewai-files): avoid pickle serialization in upload cache --- .../src/crewai_files/cache/upload_cache.py | 57 ++++++++++++++++++- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/lib/crewai-files/src/crewai_files/cache/upload_cache.py b/lib/crewai-files/src/crewai_files/cache/upload_cache.py index 48cebdfa14..cb6c41fc72 100644 --- a/lib/crewai-files/src/crewai_files/cache/upload_cache.py +++ b/lib/crewai-files/src/crewai_files/cache/upload_cache.py @@ -9,11 +9,12 @@ from dataclasses import dataclass from datetime import datetime, timezone import hashlib +import json import logging from typing import TYPE_CHECKING, Any from aiocache import Cache # type: ignore[import-untyped] -from aiocache.serializers import PickleSerializer # type: ignore[import-untyped] +from aiocache.serializers import BaseSerializer # type: ignore[import-untyped] from crewai_files.core.constants import DEFAULT_MAX_CACHE_ENTRIES, DEFAULT_TTL_SECONDS from crewai_files.uploaders.factory import ProviderType @@ -25,6 +26,56 @@ logger = logging.getLogger(__name__) +class _CachedUploadSerializer(BaseSerializer): + """JSON serializer for cached upload metadata. + + The UploadCache supports external backends (e.g. redis). Avoid pickle-based + serialization for cached values so cache poisoning cannot turn into code + execution via unsafe deserialization. + """ + + @staticmethod + def _to_json(obj: CachedUpload) -> dict[str, Any]: + return { + "file_id": obj.file_id, + "provider": obj.provider, + "file_uri": obj.file_uri, + "content_type": obj.content_type, + "uploaded_at": obj.uploaded_at.isoformat(), + "expires_at": obj.expires_at.isoformat() if obj.expires_at is not None else None, + } + + @staticmethod + def _from_json(data: dict[str, Any]) -> CachedUpload: + return CachedUpload( + file_id=data["file_id"], + provider=data["provider"], + file_uri=data.get("file_uri"), + content_type=data["content_type"], + uploaded_at=datetime.fromisoformat(data["uploaded_at"]), + expires_at=( + datetime.fromisoformat(data["expires_at"]) + if data.get("expires_at") is not None + else None + ), + ) + + def dumps(self, value: CachedUpload | None) -> str: # type: ignore[override] + if value is None: + return "null" + return json.dumps(self._to_json(value), sort_keys=True) + + def loads(self, value: str | None) -> CachedUpload | None: # type: ignore[override] + if value is None: + return None + parsed = json.loads(value) + if parsed is None: + return None + if not isinstance(parsed, dict): + raise ValueError("invalid cached upload payload") + return self._from_json(parsed) + + @dataclass class CachedUpload: """Represents a cached file upload. @@ -123,13 +174,13 @@ def __init__( if cache_type == "redis": self._cache = Cache( Cache.REDIS, - serializer=PickleSerializer(), + serializer=_CachedUploadSerializer(), namespace=namespace, **cache_kwargs, ) else: self._cache = Cache( - serializer=PickleSerializer(), + serializer=_CachedUploadSerializer(), namespace=namespace, ) From 3e9caf97bfdd5e52348dc6019780579ae291cff4 Mon Sep 17 00:00:00 2001 From: "open-swe[bot]" Date: Wed, 27 May 2026 22:18:42 +0800 Subject: [PATCH 2/2] fix: treat unreadable upload cache entries as misses --- .../src/crewai_files/cache/upload_cache.py | 18 +++++--- lib/crewai-files/tests/test_upload_cache.py | 43 ++++++++++++++++++- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/lib/crewai-files/src/crewai_files/cache/upload_cache.py b/lib/crewai-files/src/crewai_files/cache/upload_cache.py index cb6c41fc72..da54774d42 100644 --- a/lib/crewai-files/src/crewai_files/cache/upload_cache.py +++ b/lib/crewai-files/src/crewai_files/cache/upload_cache.py @@ -42,7 +42,9 @@ def _to_json(obj: CachedUpload) -> dict[str, Any]: "file_uri": obj.file_uri, "content_type": obj.content_type, "uploaded_at": obj.uploaded_at.isoformat(), - "expires_at": obj.expires_at.isoformat() if obj.expires_at is not None else None, + "expires_at": obj.expires_at.isoformat() + if obj.expires_at is not None + else None, } @staticmethod @@ -68,12 +70,16 @@ def dumps(self, value: CachedUpload | None) -> str: # type: ignore[override] def loads(self, value: str | None) -> CachedUpload | None: # type: ignore[override] if value is None: return None - parsed = json.loads(value) - if parsed is None: + try: + parsed = json.loads(value) + if parsed is None: + return None + if not isinstance(parsed, dict): + return None + return self._from_json(parsed) + except (TypeError, ValueError, KeyError) as exc: + logger.debug("Ignoring unreadable cached upload payload: %s", exc) return None - if not isinstance(parsed, dict): - raise ValueError("invalid cached upload payload") - return self._from_json(parsed) @dataclass diff --git a/lib/crewai-files/tests/test_upload_cache.py b/lib/crewai-files/tests/test_upload_cache.py index 5b2bb6a47e..3fada20c32 100644 --- a/lib/crewai-files/tests/test_upload_cache.py +++ b/lib/crewai-files/tests/test_upload_cache.py @@ -1,9 +1,15 @@ """Tests for upload cache.""" from datetime import datetime, timedelta, timezone +import json +import pickle from crewai_files import FileBytes, ImageFile -from crewai_files.cache.upload_cache import CachedUpload, UploadCache +from crewai_files.cache.upload_cache import ( + CachedUpload, + UploadCache, + _CachedUploadSerializer, +) # Minimal valid PNG @@ -76,6 +82,41 @@ def test_is_expired_no_expiry(self): assert cached.is_expired() is False +class TestCachedUploadSerializer: + """Tests for cache serializer compatibility.""" + + def test_json_round_trip(self): + """Test cached uploads round-trip through the JSON serializer.""" + now = datetime.now(timezone.utc) + cached = CachedUpload( + file_id="file-123", + provider="gemini", + file_uri="files/file-123", + content_type="image/png", + uploaded_at=now, + expires_at=now + timedelta(hours=48), + ) + serializer = _CachedUploadSerializer() + + loaded = serializer.loads(serializer.dumps(cached)) + + assert loaded == cached + + def test_unreadable_payloads_are_cache_misses(self): + """Test unreadable payloads are treated as cache misses.""" + serializer = _CachedUploadSerializer() + old_pickle_payload = pickle.dumps({"file_id": "file-123"}) + payloads = [ + old_pickle_payload, + "not-json", + json.dumps(["not", "a", "dict"]), + json.dumps({"file_id": "file-123"}), + ] + + for payload in payloads: + assert serializer.loads(payload) is None # type: ignore[arg-type] + + class TestUploadCache: """Tests for UploadCache class."""