feat(test-performance): optimize 10x unit test execution time (#58)

keli-wen · claude · web-flow · commit c907fdf468e0 · 2025-08-28T00:44:01.000+08:00
Co-authored-by: Claude &lt;noreply@anthropic.com&gt;
diff --git a/quantmind/config/storage.py b/quantmind/config/storage.py
@@ -12,6 +12,10 @@ class BaseStorageConfig(BaseModel):
         default=Path("./data"), description="Base storage directory"
     )
 
+    download_timeout: int = Field(
+        default=30, description="Timeout in seconds for downloading files"
+    )
+
 
 class LocalStorageConfig(BaseStorageConfig):
     """Configuration for local file-based storage."""
diff --git a/quantmind/storage/base.py b/quantmind/storage/base.py
@@ -164,19 +164,29 @@ def _handle_paper_files(self, paper: Paper) -> None:
                     f"Failed to download PDF for {paper.get_primary_id()}: {e}"
                 )
 
-    def _download_file_content(self, url: str) -> Optional[bytes]:
+    def _download_file_content(
+        self, url: str, timeout: Optional[int] = None
+    ) -> Optional[bytes]:
         """Download file content from URL.
 
         Args:
             url: URL to download from
+            timeout: Timeout in seconds (uses config if None)
 
         Returns:
             File content as bytes or None if failed
         """
+        # Use config timeout if not provided
+        if timeout is None:
+            timeout = getattr(self, "config", None)
+            timeout = (
+                getattr(timeout, "download_timeout", 30) if timeout else 30
+            )
+
         try:
             import requests
 
-            response = requests.get(url, timeout=30)
+            response = requests.get(url, timeout=timeout)
             response.raise_for_status()
             return response.content
         except Exception:
diff --git a/scripts/unittest.sh b/scripts/unittest.sh
@@ -3,6 +3,10 @@
 # If the input is all, run all tests.
 if [ "$1" == "all" ]; then
     pytest tests
+elif [ "$1" == "report" ]; then
+    # Show the 20 slowest tests to identify performance bottlenecks.
+    # Also reports the total execution time of the test suite.
+    time pytest --durations=20 tests
 elif [ -n "$1" ]; then
     # If the input file exists, test the input file.
     pytest $1
diff --git a/tests/config/test_embedding.py b/tests/config/test_embedding.py
@@ -31,7 +31,7 @@ def test_custom_config(self):
             user="test_user_123",
             dimensions=512,
             encoding_format="base64",
-            timeout=30,
+            timeout=1,
             api_key="test-key",
             api_base="https://api.example.com",
             api_version="2023-05-15",
@@ -42,7 +42,7 @@ def test_custom_config(self):
         self.assertEqual(config.user, "test_user_123")
         self.assertEqual(config.dimensions, 512)
         self.assertEqual(config.encoding_format, "base64")
-        self.assertEqual(config.timeout, 30)
+        self.assertEqual(config.timeout, 1)
         self.assertEqual(config.api_key, "test-key")
         self.assertEqual(config.api_base, "https://api.example.com")
         self.assertEqual(config.api_version, "2023-05-15")
@@ -126,7 +126,7 @@ def test_get_litellm_params_full(self):
             user="test_user",
             dimensions=512,
             encoding_format="base64",
-            timeout=30,
+            timeout=1,
             api_key="test-key",
             api_base="https://api.example.com",
             api_version="2023-05-15",
@@ -171,24 +171,24 @@ def test_create_variant(self):
         """Test creating configuration variants."""
         base_config = EmbeddingConfig(
             model="text-embedding-ada-002",
-            timeout=60,
+            timeout=1,
             api_key="base-key",
         )
 
         # Create variant with overrides
         variant = base_config.create_variant(
-            timeout=30,
+            timeout=1,
             api_key="variant-key",
             user="test_user",
         )
 
         # Original config should be unchanged
-        self.assertEqual(base_config.timeout, 60)
+        self.assertEqual(base_config.timeout, 1)
         self.assertEqual(base_config.api_key, "base-key")
         self.assertIsNone(base_config.user)
 
         # Variant should have new values
-        self.assertEqual(variant.timeout, 30)
+        self.assertEqual(variant.timeout, 1)
         self.assertEqual(variant.api_key, "variant-key")
         self.assertEqual(variant.user, "test_user")
         self.assertEqual(variant.model, "text-embedding-ada-002")  # Unchanged
@@ -197,7 +197,7 @@ def test_create_variant_empty(self):
         """Test creating variant with no overrides."""
         base_config = EmbeddingConfig(
             model="text-embedding-ada-002",
-            timeout=60,
+            timeout=1,
         )
 
         variant = base_config.create_variant()
@@ -245,11 +245,11 @@ def test_timeout_validation(self):
         config = EmbeddingConfig(timeout=1)
         self.assertEqual(config.timeout, 1)
 
-        config = EmbeddingConfig(timeout=600)
-        self.assertEqual(config.timeout, 600)
+        config = EmbeddingConfig(timeout=1)
+        self.assertEqual(config.timeout, 1)
 
-        config = EmbeddingConfig(timeout=3600)
-        self.assertEqual(config.timeout, 3600)
+        config = EmbeddingConfig(timeout=1)
+        self.assertEqual(config.timeout, 1)
 
         # Zero and negative timeouts should be allowed (validation handled by API)
         config = EmbeddingConfig(timeout=0)
diff --git a/tests/llm/test_embedding_block.py b/tests/llm/test_embedding_block.py
@@ -16,7 +16,8 @@ def setUp(self):
         self.config = EmbeddingConfig(
             model="text-embedding-ada-002",
             api_key="test-key",
-            timeout=30,
+            timeout=1,
+            retry_delay=0.01,
         )
 
     @patch("quantmind.llm.embedding.LITELLM_AVAILABLE", True)
@@ -28,7 +29,7 @@ def test_init_success(self, mock_litellm):
         self.assertEqual(block.config, self.config)
         mock_litellm.set_verbose = False
         self.assertEqual(mock_litellm.num_retries, 3)
-        self.assertEqual(mock_litellm.request_timeout, 30)
+        self.assertEqual(mock_litellm.request_timeout, 1)
 
     @patch("quantmind.llm.embedding.LITELLM_AVAILABLE", False)
     def test_init_litellm_unavailable(self):
@@ -163,7 +164,7 @@ def test_call_with_retry_failure_then_success(
 
         self.assertEqual(result, mock_response)
         self.assertEqual(mock_embedding.call_count, 2)
-        mock_sleep.assert_called_once_with(1.0)
+        mock_sleep.assert_called_once_with(0.01)
 
     @patch("quantmind.llm.embedding.LITELLM_AVAILABLE", True)
     @patch("quantmind.llm.embedding.litellm")
@@ -271,7 +272,7 @@ def test_get_info(self, mock_litellm):
 
         self.assertEqual(info["model"], "text-embedding-ada-002")
         self.assertEqual(info["provider"], "openai")
-        self.assertEqual(info["timeout"], 30)
+        self.assertEqual(info["timeout"], 1)
         self.assertEqual(info["retry_attempts"], 3)
 
     @patch("quantmind.llm.embedding.LITELLM_AVAILABLE", True)
@@ -281,14 +282,14 @@ def test_update_config(self, mock_litellm):
         block = EmbeddingBlock(self.config)
 
         # Check initial config
-        self.assertEqual(block.config.timeout, 30)
+        self.assertEqual(block.config.timeout, 1)
         self.assertEqual(block.config.api_key, "test-key")
 
         # Update config
-        block.update_config(timeout=60, api_key="new-key")
+        block.update_config(timeout=2, api_key="new-key")
 
         # Check updated config
-        self.assertEqual(block.config.timeout, 60)
+        self.assertEqual(block.config.timeout, 2)
         self.assertEqual(block.config.api_key, "new-key")
         # Other values should remain unchanged
         self.assertEqual(block.config.model, "text-embedding-ada-002")
@@ -300,14 +301,14 @@ def test_temporary_config(self, mock_litellm):
         block = EmbeddingBlock(self.config)
 
         # Check initial config
-        self.assertEqual(block.config.timeout, 30)
+        self.assertEqual(block.config.timeout, 1)
 
         # Use temporary config
-        with block.temporary_config(timeout=60):
-            self.assertEqual(block.config.timeout, 60)
+        with block.temporary_config(timeout=2):
+            self.assertEqual(block.config.timeout, 2)
 
         # Check config is restored
-        self.assertEqual(block.config.timeout, 30)
+        self.assertEqual(block.config.timeout, 1)
 
     @patch("quantmind.llm.embedding.LITELLM_AVAILABLE", True)
     @patch("quantmind.llm.embedding.litellm")
@@ -353,8 +354,8 @@ def test_batch_embed_with_delay(
         config = EmbeddingConfig(
             model="text-embedding-ada-002",
             api_key="test-key",
-            timeout=30,
-            retry_delay=0.1,
+            timeout=1,
+            retry_delay=0.01,
         )
 
         mock_response = Mock()
diff --git a/tests/llm/test_llm_block.py b/tests/llm/test_llm_block.py
@@ -14,7 +14,12 @@ class TestLLMBlock(unittest.TestCase):
     def setUp(self):
         """Set up test fixtures."""
         self.config = LLMConfig(
-            model="gpt-4o", temperature=0.7, max_tokens=1000, api_key="test-key"
+            model="gpt-4o",
+            temperature=0.7,
+            max_tokens=1000,
+            api_key="test-key",
+            timeout=1,
+            retry_delay=0.01,
         )
 
     @patch("quantmind.llm.block.LITELLM_AVAILABLE", True)
@@ -26,7 +31,7 @@ def test_init_success(self, mock_litellm):
         self.assertEqual(block.config, self.config)
         mock_litellm.set_verbose = False
         self.assertEqual(mock_litellm.num_retries, 3)
-        self.assertEqual(mock_litellm.request_timeout, 60)
+        self.assertEqual(mock_litellm.request_timeout, 1)
 
     @patch("quantmind.llm.block.LITELLM_AVAILABLE", False)
     def test_init_litellm_unavailable(self):
@@ -210,7 +215,7 @@ def test_call_with_retry_failure_then_success(
 
         self.assertEqual(result, mock_response)
         self.assertEqual(mock_completion.call_count, 2)
-        mock_sleep.assert_called_once_with(1.0)
+        mock_sleep.assert_called_once_with(0.01)
 
     @patch("quantmind.llm.block.LITELLM_AVAILABLE", True)
     @patch("quantmind.llm.block.litellm")
@@ -263,7 +268,7 @@ def test_get_info(self, mock_litellm):
             "provider": "openai",
             "temperature": 0.7,
             "max_tokens": 1000,
-            "timeout": 60,
+            "timeout": 1,
             "retry_attempts": 3,
         }
 
diff --git a/tests/parsers/test_llama_parser.py b/tests/parsers/test_llama_parser.py
@@ -86,6 +86,8 @@ def setUp(self):
             result_type=ResultType.MD,
             parsing_mode=ParsingMode.FAST,
             max_file_size_mb=50,
+            timeout=10,
+            retry_attempts=1,
         )
 
     @patch("quantmind.parsers.llama_parser.LlamaParse")
@@ -277,8 +279,24 @@ def test_parse_paper_no_pdf(self, mock_llama_parse):
         self.assertNotIn("parser_info", result.meta_info)
 
     @patch("quantmind.parsers.llama_parser.LlamaParse")
-    def test_parse_paper_parsing_error(self, mock_llama_parse):
+    @patch("quantmind.parsers.llama_parser.requests.get")
+    @patch("tempfile.NamedTemporaryFile")
+    def test_parse_paper_parsing_error(
+        self, mock_temp_file, mock_requests, mock_llama_parse
+    ):
         """Test parsing paper with parsing error."""
+        # Mock temp file
+        mock_temp = Mock()
+        mock_temp.name = "/tmp/test.pdf"
+        mock_temp_file.return_value.__enter__.return_value = mock_temp
+
+        # Mock requests
+        mock_response = Mock()
+        mock_response.headers = {"content-type": "application/pdf"}
+        mock_response.iter_content.return_value = [b"fake pdf content"]
+        mock_requests.return_value = mock_response
+
+        # Mock LlamaParse to raise exception
         mock_llama_instance = Mock()
         mock_llama_instance.parse.side_effect = Exception("Parsing failed")
         mock_llama_parse.return_value = mock_llama_instance
@@ -290,7 +308,14 @@ def test_parse_paper_parsing_error(self, mock_llama_parse):
             pdf_url="https://example.com/paper.pdf",
         )
 
-        result = parser.parse_paper(paper)
+        with (
+            patch("pathlib.Path.exists", return_value=True),
+            patch("pathlib.Path.stat") as mock_stat,
+            patch("os.path.exists", return_value=True),
+            patch("os.unlink"),
+        ):
+            mock_stat.return_value.st_size = 1024 * 1024  # 1MB
+            result = parser.parse_paper(paper)
 
         # Should return original paper without content due to error
         self.assertIsNone(result.content)
diff --git a/tests/storage/test_local_storage.py b/tests/storage/test_local_storage.py
@@ -6,6 +6,7 @@
 import unittest
 from datetime import datetime, timezone
 from pathlib import Path
+from unittest.mock import Mock, patch
 
 from quantmind.config.storage import LocalStorageConfig
 from quantmind.models.paper import Paper
@@ -18,7 +19,9 @@ class TestEnhancedStorageWithIndexing(unittest.TestCase):
     def setUp(self):
         """Set up test environment."""
         self.temp_dir = Path(tempfile.mkdtemp())
-        self.config = LocalStorageConfig(storage_dir=self.temp_dir)
+        self.config = LocalStorageConfig(
+            storage_dir=self.temp_dir, download_timeout=1
+        )
         self.storage = LocalStorage(self.config)
 
     def tearDown(self):
@@ -321,8 +324,15 @@ def test_process_knowledge_paper(self):
         self.assertIsNotNone(retrieved_paper)
         self.assertEqual(retrieved_paper.title, "Test Paper")
 
-    def test_process_knowledge_paper_with_pdf_url(self):
+    @patch("requests.get")
+    def test_process_knowledge_paper_with_pdf_url(self, mock_requests):
         """Test Paper storage with PDF URL and indexing."""
+        # Mock requests to avoid real network calls
+        mock_response = Mock()
+        mock_response.content = b"%PDF-1.4 fake content"
+        mock_response.raise_for_status = Mock()
+        mock_requests.return_value = mock_response
+
         paper = Paper(
             title="Paper with PDF URL",
             abstract="Test paper with PDF URL",

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,10 @@ class BaseStorageConfig(BaseModel):`
`12`	`12`	`default=Path("./data"), description="Base storage directory"`
`13`	`13`	`)`
`14`	`14`
	`15`	`+ download_timeout: int = Field(`
	`16`	`+ default=30, description="Timeout in seconds for downloading files"`
	`17`	`+ )`
	`18`	`+`
`15`	`19`
`16`	`20`	`class LocalStorageConfig(BaseStorageConfig):`
`17`	`21`	`"""Configuration for local file-based storage."""`