Skip to content

Commit eb0f743

Browse files
committed
fix: skip temperature parameter for O-series models
OpenAI O-series models (o1, o1-mini, o1-preview) only support temperature=1 or no temperature parameter. This fix detects O-series models and omits the temperature parameter for those models. Fixes #8
1 parent f09280f commit eb0f743

3 files changed

Lines changed: 82 additions & 13 deletions

File tree

skills/adversarial-spec/scripts/debate.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
extract_tasks,
7373
generate_diff,
7474
get_critique_summary,
75+
is_o_series_model,
7576
load_context_files,
7677
)
7778
from prompts import EXPORT_TASKS_PROMPT, get_doc_type_name # noqa: E402
@@ -700,12 +701,18 @@ def handle_export_tasks(args: argparse.Namespace, models: list[str]) -> None:
700701
prompt = EXPORT_TASKS_PROMPT.format(doc_type_name=doc_type_name, spec=spec)
701702

702703
try:
703-
response = completion(
704-
model=models[0],
705-
messages=[{"role": "user", "content": prompt}],
706-
temperature=0.3,
707-
max_tokens=8000,
708-
)
704+
# Build completion kwargs
705+
completion_kwargs = {
706+
"model": models[0],
707+
"messages": [{"role": "user", "content": prompt}],
708+
"max_tokens": 8000,
709+
}
710+
711+
# O-series models don't support custom temperature
712+
if not is_o_series_model(models[0]):
713+
completion_kwargs["temperature"] = 0.3
714+
715+
response = completion(**completion_kwargs)
709716
content = response.choices[0].message.content
710717
tasks = extract_tasks(content)
711718

skills/adversarial-spec/scripts/models.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,23 @@
4646
RETRY_BASE_DELAY = 1.0 # seconds
4747

4848

49+
def is_o_series_model(model: str) -> bool:
50+
"""
51+
Check if a model is an OpenAI O-series model.
52+
53+
O-series models (o1, o1-mini, o1-preview) don't support custom temperature.
54+
They only accept temperature=1 or no temperature parameter.
55+
56+
Args:
57+
model: Model identifier string.
58+
59+
Returns:
60+
True if the model is an O-series model.
61+
"""
62+
model_lower = model.lower()
63+
return model_lower.startswith("o1") or "/o1" in model_lower or "-o1" in model_lower
64+
65+
4966
@dataclass
5067
class ModelResponse:
5168
"""Response from a model critique."""
@@ -457,16 +474,22 @@ def call_single_model(
457474

458475
for attempt in range(MAX_RETRIES):
459476
try:
460-
response = completion(
461-
model=actual_model,
462-
messages=[
477+
# Build completion kwargs
478+
completion_kwargs = {
479+
"model": actual_model,
480+
"messages": [
463481
{"role": "system", "content": system_prompt},
464482
{"role": "user", "content": user_message},
465483
],
466-
temperature=0.7,
467-
max_tokens=8000,
468-
timeout=timeout,
469-
)
484+
"max_tokens": 8000,
485+
"timeout": timeout,
486+
}
487+
488+
# O-series models don't support custom temperature
489+
if not is_o_series_model(actual_model):
490+
completion_kwargs["temperature"] = 0.7
491+
492+
response = completion(**completion_kwargs)
470493
content = response.choices[0].message.content
471494
agreed = "[AGREE]" in content
472495
extracted = extract_spec(content)

skills/adversarial-spec/scripts/tests/test_models.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
extract_tasks,
2121
generate_diff,
2222
get_critique_summary,
23+
is_o_series_model,
2324
load_context_files,
2425
)
2526

@@ -974,3 +975,41 @@ def test_max_retries_is_reasonable(self):
974975
def test_retry_base_delay_is_positive(self):
975976
# Mutation: 1.0 -> 2.0 would be caught
976977
assert RETRY_BASE_DELAY == 1.0
978+
979+
980+
class TestIsOSeriesModel:
981+
"""Test detection of OpenAI O-series models."""
982+
983+
def test_detects_o1(self):
984+
assert is_o_series_model("o1") is True
985+
986+
def test_detects_o1_mini(self):
987+
assert is_o_series_model("o1-mini") is True
988+
989+
def test_detects_o1_preview(self):
990+
assert is_o_series_model("o1-preview") is True
991+
992+
def test_detects_o1_with_provider_prefix(self):
993+
assert is_o_series_model("openai/o1") is True
994+
995+
def test_detects_o1_via_openrouter(self):
996+
assert is_o_series_model("openrouter/openai/o1-mini") is True
997+
998+
def test_case_insensitive(self):
999+
assert is_o_series_model("O1") is True
1000+
assert is_o_series_model("O1-MINI") is True
1001+
1002+
def test_does_not_detect_gpt4o(self):
1003+
assert is_o_series_model("gpt-4o") is False
1004+
1005+
def test_does_not_detect_gpt4o_mini(self):
1006+
assert is_o_series_model("gpt-4o-mini") is False
1007+
1008+
def test_does_not_detect_claude(self):
1009+
assert is_o_series_model("claude-sonnet-4-20250514") is False
1010+
1011+
def test_does_not_detect_gemini(self):
1012+
assert is_o_series_model("gemini/gemini-2.0-flash") is False
1013+
1014+
def test_does_not_detect_empty_string(self):
1015+
assert is_o_series_model("") is False

0 commit comments

Comments
 (0)