fix: skip temperature parameter for O-series models

zscole · zscole · commit eb0f743d16d6 · 2026-01-13T10:29:50.000-06:00
OpenAI O-series models (o1, o1-mini, o1-preview) only support temperature=1 or no temperature parameter. This fix detects O-series models and omits the temperature parameter for those models. Fixes #8
diff --git a/skills/adversarial-spec/scripts/debate.py b/skills/adversarial-spec/scripts/debate.py
@@ -72,6 +72,7 @@
     extract_tasks,
     generate_diff,
     get_critique_summary,
+    is_o_series_model,
     load_context_files,
 )
 from prompts import EXPORT_TASKS_PROMPT, get_doc_type_name  # noqa: E402
@@ -700,12 +701,18 @@ def handle_export_tasks(args: argparse.Namespace, models: list[str]) -> None:
     prompt = EXPORT_TASKS_PROMPT.format(doc_type_name=doc_type_name, spec=spec)
 
     try:
-        response = completion(
-            model=models[0],
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.3,
-            max_tokens=8000,
-        )
+        # Build completion kwargs
+        completion_kwargs = {
+            "model": models[0],
+            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": 8000,
+        }
+
+        # O-series models don't support custom temperature
+        if not is_o_series_model(models[0]):
+            completion_kwargs["temperature"] = 0.3
+
+        response = completion(**completion_kwargs)
         content = response.choices[0].message.content
         tasks = extract_tasks(content)
 
diff --git a/skills/adversarial-spec/scripts/models.py b/skills/adversarial-spec/scripts/models.py
@@ -46,6 +46,23 @@
 RETRY_BASE_DELAY = 1.0  # seconds
 
 
+def is_o_series_model(model: str) -> bool:
+    """
+    Check if a model is an OpenAI O-series model.
+
+    O-series models (o1, o1-mini, o1-preview) don't support custom temperature.
+    They only accept temperature=1 or no temperature parameter.
+
+    Args:
+        model: Model identifier string.
+
+    Returns:
+        True if the model is an O-series model.
+    """
+    model_lower = model.lower()
+    return model_lower.startswith("o1") or "/o1" in model_lower or "-o1" in model_lower
+
+
 @dataclass
 class ModelResponse:
     """Response from a model critique."""
@@ -457,16 +474,22 @@ def call_single_model(
 
     for attempt in range(MAX_RETRIES):
         try:
-            response = completion(
-                model=actual_model,
-                messages=[
+            # Build completion kwargs
+            completion_kwargs = {
+                "model": actual_model,
+                "messages": [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_message},
                 ],
-                temperature=0.7,
-                max_tokens=8000,
-                timeout=timeout,
-            )
+                "max_tokens": 8000,
+                "timeout": timeout,
+            }
+
+            # O-series models don't support custom temperature
+            if not is_o_series_model(actual_model):
+                completion_kwargs["temperature"] = 0.7
+
+            response = completion(**completion_kwargs)
             content = response.choices[0].message.content
             agreed = "[AGREE]" in content
             extracted = extract_spec(content)
diff --git a/skills/adversarial-spec/scripts/tests/test_models.py b/skills/adversarial-spec/scripts/tests/test_models.py
@@ -20,6 +20,7 @@
     extract_tasks,
     generate_diff,
     get_critique_summary,
+    is_o_series_model,
     load_context_files,
 )
 
@@ -974,3 +975,41 @@ def test_max_retries_is_reasonable(self):
     def test_retry_base_delay_is_positive(self):
         # Mutation: 1.0 -> 2.0 would be caught
         assert RETRY_BASE_DELAY == 1.0
+
+
+class TestIsOSeriesModel:
+    """Test detection of OpenAI O-series models."""
+
+    def test_detects_o1(self):
+        assert is_o_series_model("o1") is True
+
+    def test_detects_o1_mini(self):
+        assert is_o_series_model("o1-mini") is True
+
+    def test_detects_o1_preview(self):
+        assert is_o_series_model("o1-preview") is True
+
+    def test_detects_o1_with_provider_prefix(self):
+        assert is_o_series_model("openai/o1") is True
+
+    def test_detects_o1_via_openrouter(self):
+        assert is_o_series_model("openrouter/openai/o1-mini") is True
+
+    def test_case_insensitive(self):
+        assert is_o_series_model("O1") is True
+        assert is_o_series_model("O1-MINI") is True
+
+    def test_does_not_detect_gpt4o(self):
+        assert is_o_series_model("gpt-4o") is False
+
+    def test_does_not_detect_gpt4o_mini(self):
+        assert is_o_series_model("gpt-4o-mini") is False
+
+    def test_does_not_detect_claude(self):
+        assert is_o_series_model("claude-sonnet-4-20250514") is False
+
+    def test_does_not_detect_gemini(self):
+        assert is_o_series_model("gemini/gemini-2.0-flash") is False
+
+    def test_does_not_detect_empty_string(self):
+        assert is_o_series_model("") is False