diff --git a/_spec_AgentOrchestration.md b/_spec_AgentOrchestration.md index fb7a0fb..204a987 100644 --- a/_spec_AgentOrchestration.md +++ b/_spec_AgentOrchestration.md @@ -614,6 +614,7 @@ One-time operator configuration required before the pipeline can run. No code ch - [ ] `~/.dev-team` added to `permissions.additionalDirectories` in `~/.claude/settings.json` (eliminates per-write permission prompts) - [ ] Plugin installation confirmed current (latest changes pulled from `dev-team-agents` repo) - [ ] Given `GH_TOKEN` is set to Claude's PAT, when the developer agent runs `gh pr create`, then no account-picker prompt appears +- [ ] Increment the version in `plugins/dev-team/.claude-plugin/plugin.json` by 0.0.1 --- @@ -627,6 +628,7 @@ Run a full implement pipeline cycle to confirm the step-machine architecture wor - [ ] Given a full researcher → developer → reviewer → sign-off cycle, when it completes, then the context file at `~/.dev-team//.md` contains all expected sections and no `claude -p` processes are spawned - [ ] Given `GH_TOKEN` is set, when the developer agent creates a PR, then no account-picker prompt appears - [ ] Sub-agents (researcher, developer, reviewer) successfully make Jira MCP and GitHub MCP calls directly without top-level relay +- [ ] Increment the version in `plugins/dev-team/.claude-plugin/plugin.json` by 0.0.1 --- @@ -645,6 +647,7 @@ Implement `agents/troubleshooter.md` with the sign-off deadlock condition as the - [ ] Unknown trigger fallback: returns `{"action": "needs_user_input", "reason": "Unknown trigger: . Manual inspection required."}` - [ ] Writes diagnosis to `` before returning in all cases - [ ] Given the pipeline has reached `signoff_cycle_count == 2` with a deadlocked thread, when the troubleshooter runs, then it asks the user how to proceed and acts on the answer without re-running the sign-off +- [ ] Increment the version in `plugins/dev-team/.claude-plugin/plugin.json` by 0.0.1 ## Related Epics diff --git a/plugins/dev-team/.claude-plugin/plugin.json b/plugins/dev-team/.claude-plugin/plugin.json index f493d17..34817c6 100644 --- a/plugins/dev-team/.claude-plugin/plugin.json +++ b/plugins/dev-team/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "dev-team", - "version": "1.2.1", + "version": "1.2.2", "description": "Dev-team agent pipeline: researcher, developer, reviewer, and debugger agents for implementing Jira tasks and fixing GitHub issues.", "commands": "./commands" } diff --git a/plugins/dev-team/scripts/dev_team.py b/plugins/dev-team/scripts/dev_team.py index 27d31dc..42ab45c 100644 --- a/plugins/dev-team/scripts/dev_team.py +++ b/plugins/dev-team/scripts/dev_team.py @@ -492,22 +492,32 @@ class Step(ABC): handles: str @abstractmethod - def run(self, ctx: PipelineContext) -> str: - """Execute step logic. Returns a trigger name, OR calls exit_with_actions.""" + def get_actions(self) -> list[dict]: + """Return action descriptors to dispatch. Empty list means inline step.""" + ... + + @abstractmethod + def handle_results(self) -> str: + """Process results from the context file and return a trigger moniker.""" ... class FindSpecStep(Step): handles = "spec-finding" - def run(self, ctx: PipelineContext) -> str: + def __init__(self, ctx: "PipelineContext") -> None: + self._ctx = ctx + + def get_actions(self) -> list[dict]: + """Inline step — no actions needed.""" + return [] + + def handle_results(self) -> str: + ctx = self._ctx if ctx.spec_path: - print("Spec path already set — skipping.", flush=True) return "spec_found" - print(f"Searching for spec for {ctx.work_item_id}...", flush=True) spec_file = find_spec_file(ctx.work_item_id) ctx.spec_path = str(spec_file.relative_to(REPO_ROOT)) - print(f"Found {spec_file}", flush=True) return "spec_found" @@ -516,29 +526,16 @@ class DebugStep(Step): _PENDING_KEY = "debug" - def __init__(self, context_path: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx self._context_path = context_path - def run(self, ctx: PipelineContext) -> str: + def get_actions(self) -> list[dict]: + ctx = self._ctx if ctx.debug_report: - _handle_agent_success(ctx) - if "# Debug report for" not in ctx.debug_report: - ctx.last_failure = f"Bug could not be reproduced.\n\n{ctx.debug_report}" - return "reproduction_failed" - print("Debugging complete.", flush=True) - return "debug_done" - - if ctx.pending_agent == self._PENDING_KEY: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - print(f"Debugger is investigating {ctx.work_item_id}...", flush=True) - ctx.pending_agent = self._PENDING_KEY - ctx.save(self._context_path) - exit_with_actions([{ + # Result already available — inline step + return [] + return [{ "action": "spawn_agent", "message": f"Debugger is investigating {ctx.work_item_id}.", "agent": "debugger", @@ -547,8 +544,25 @@ def run(self, ctx: PipelineContext) -> str: "args": ctx.work_item_id, "read_sections": [], "write_section": "Debug Report", - "result_format": "reproduced | not_reproduced", - }]) + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + if ctx.debug_report: + _handle_agent_success(ctx) + if "# Debug report for" not in ctx.debug_report: + ctx.last_failure = f"Bug could not be reproduced.\n\n{ctx.debug_report}" + return "reproduction_failed" + return "debug_done" + # Agent ran but wrote nothing + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + # If we get here, consecutive_failures has not hit threshold — return failure trigger + return "reproduction_failed" class ResearchStep(Step): @@ -556,28 +570,17 @@ class ResearchStep(Step): _PENDING_KEY = "research" - def __init__(self, skill: str, context_path: Path) -> None: + def __init__(self, skill: str, ctx: "PipelineContext", context_path: Path) -> None: self._skill = skill + self._ctx = ctx self._context_path = context_path - def run(self, ctx: PipelineContext) -> str: + def get_actions(self) -> list[dict]: + ctx = self._ctx if ctx.brief: - _handle_agent_success(ctx) - print("Research complete.", flush=True) - return "research_done" - - if ctx.pending_agent == self._PENDING_KEY: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - print(f"Researcher is planning work for {ctx.work_item_id}...", flush=True) + return [] read_sections = ["Debug Report"] if ctx.debug_report else [] - ctx.pending_agent = self._PENDING_KEY - ctx.save(self._context_path) - exit_with_actions([{ + return [{ "action": "spawn_agent", "message": f"Researcher is planning work for {ctx.work_item_id}.", "agent": "researcher", @@ -586,8 +589,20 @@ def run(self, ctx: PipelineContext) -> str: "args": f"{ctx.work_item_id} {ctx.spec_path}", "read_sections": read_sections, "write_section": "Researcher Brief", - "result_format": "briefed | failed", - }]) + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + if ctx.brief: + _handle_agent_success(ctx) + return "research_done" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "research_done" class ImplementStep(Step): @@ -595,26 +610,15 @@ class ImplementStep(Step): _PENDING_KEY = "implement" - def __init__(self, context_path: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx self._context_path = context_path - def run(self, ctx: PipelineContext) -> str: + def get_actions(self) -> list[dict]: + ctx = self._ctx if ctx.work_summaries: - _handle_agent_success(ctx) - print("Implementation already complete in context — skipping.", flush=True) - return "impl_done" - - if ctx.pending_agent == self._PENDING_KEY: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - print(f"Developer is implementing {ctx.work_item_id}...", flush=True) - ctx.pending_agent = self._PENDING_KEY - ctx.save(self._context_path) - exit_with_actions([{ + return [] + return [{ "action": "spawn_agent", "message": "Researcher has written the task brief. Developer is now implementing.", "agent": "developer", @@ -623,8 +627,20 @@ def run(self, ctx: PipelineContext) -> str: "context_file": str(self._context_path), "read_sections": ["Researcher Brief"], "write_section": "Implementation Summary", - "result_format": "implemented | failed | needs_clarification", - }]) + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + if ctx.work_summaries: + _handle_agent_success(ctx) + return "impl_done" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "impl_done" class ValidateStep(Step): @@ -632,302 +648,316 @@ class ValidateStep(Step): _PENDING_KEY = "validate" - def __init__(self, context_path: Path, log_dir: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path, log_dir: Path) -> None: + self._ctx = ctx self._context_path = context_path self._log_dir = log_dir - def run(self, ctx: PipelineContext) -> str: + def get_actions(self) -> list[dict]: + ctx = self._ctx + if ctx.validate_result: + return [] + self._log_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") + log_path = self._log_dir / f"{ctx.work_item_id}-validate-{timestamp}.log" + ctx.build_log = str(log_path) + ext = ".cmd" if sys.platform == "win32" else ".sh" + validate_script = REPO_ROOT / "scripts" / f"validate{ext}" + command = f'cmd /c "{validate_script}"' if sys.platform == "win32" else f'bash "{validate_script}"' + return [{ + "action": "run_script", + "message": "Running build and test validation.", + "command": command, + "log_file": str(log_path), + "write_section": "Validate Result", + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx if ctx.validate_result: - # Re-entry: script-runner has written the result. result = ctx.validate_result.strip() ctx.validate_result = "" ctx.pending_agent = "" if result == "passed": - print("Validation passed.", flush=True) ctx.last_failure = "" _commit_and_push(ctx.work_item_id) return "clean" - print(f"Validation FAILED. Log: {ctx.build_log}", flush=True) ctx.last_failure = ( f"Build or test failures.\n\n" f"Full log (read this for details): {ctx.build_log}" ) return "build_failed" + # Script-runner ran but wrote nothing + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "build_failed" - if ctx.pending_agent == self._PENDING_KEY: - # Re-entry with no result — script-runner failed to write outcome. - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - self._log_dir.mkdir(parents=True, exist_ok=True) - timestamp = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") - log_path = self._log_dir / f"{ctx.work_item_id}-validate-{timestamp}.log" - ctx.build_log = str(log_path) - ctx.pending_agent = self._PENDING_KEY - ctx.save(self._context_path) +class CreatePrStep(Step): + handles = "creating-pr" - ext = ".cmd" if sys.platform == "win32" else ".sh" - validate_script = REPO_ROOT / "scripts" / f"validate{ext}" - command = f'cmd /c "{validate_script}"' if sys.platform == "win32" else f'bash "{validate_script}"' - print(f"Spawning script-runner to validate {ctx.work_item_id}...", flush=True) - exit_with_actions([{ - "action": "run_script", - "message": "Running build and test validation.", - "command": command, - "log_file": str(log_path), - "write_section": "Validate Result", - "result_format": "passed | failed", - }]) + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx + self._context_path = context_path + + def get_actions(self) -> list[dict]: + ctx = self._ctx + if ctx.pr_url: + # Recovery re-entry — PR already created + return [] + read_sections = ["Researcher Brief", "Implementation Summary"] + for i in range(1, len(ctx.work_summaries)): + read_sections.append(f"Fix {i}") + return [{ + "action": "spawn_agent", + "message": "Implementation complete. Developer is creating a pull request.", + "agent": "developer", + "skill": "developer-create-pr", + "args": ctx.work_item_id, + "context_file": str(self._context_path), + "read_sections": read_sections, + "write_section": "PR URL", + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + if ctx.pr_url: + # Inline path: already had pr_url + _handle_agent_success(ctx) + return "pr_created" + # Try to extract pr_url from the PR URL section written by the agent + text = self._context_path.read_text(encoding="utf-8") + _, body = _parse_frontmatter(text) + sections = _parse_sections(body) + pr_url_section = sections.get("PR URL", "") + if pr_url_section: + m = re.search(r"https://github\.com/[^\s]+/pull/\d+", pr_url_section) + if m: + ctx.pr_url = m.group(0) + _handle_agent_success(ctx) + ctx.save(self._context_path) + return "pr_created" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "pr_created" class ReviewStep(Step): handles = "reviewing" - _PENDING_CREATE_PR = "create-pr" - _PENDING_REVIEW = "reviewer-review" - - def __init__(self, context_path: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx self._context_path = context_path - def run(self, ctx: PipelineContext) -> str: - # Sub-step 1: create PR - if not ctx.pr_url: - if ctx.pending_agent == self._PENDING_CREATE_PR: - # Re-entry: try to extract pr_url written to the "PR URL" section. - text = self._context_path.read_text(encoding="utf-8") - _, body = _parse_frontmatter(text) - sections = _parse_sections(body) - pr_url_section = sections.get("PR URL", "") - if pr_url_section: - m = re.search(r"https://github\.com/[^\s]+/pull/\d+", pr_url_section) - if m: - ctx.pr_url = m.group(0) - ctx.save(self._context_path) - if not ctx.pr_url: - # Agent ran but pr_url still not populated — treat as failure. - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - if not ctx.pr_url: - print(f"Developer is creating PR for {ctx.work_item_id}...", flush=True) - read_sections = ["Researcher Brief", "Implementation Summary"] - for i in range(1, len(ctx.work_summaries)): - read_sections.append(f"Fix {i}") - ctx.pending_agent = self._PENDING_CREATE_PR - ctx.save(self._context_path) - exit_with_actions([{ - "action": "spawn_agent", - "message": "Implementation complete. Developer is creating a pull request.", - "agent": "developer", - "skill": "developer-create-pr", - "args": ctx.work_item_id, - "context_file": str(self._context_path), - "read_sections": read_sections, - "write_section": "PR URL", - "result_format": "pr_created | failed", - }]) + def get_actions(self) -> list[dict]: + ctx = self._ctx + if ctx.review_notes: + return [] + return [{ + "action": "spawn_agent", + "message": "Pull request created. Reviewer is reviewing the changes.", + "agent": "reviewer", + "skill": "reviewer-review", + "context_file": str(self._context_path), + "read_sections": ["Researcher Brief"], + "write_section": "Review Notes", + "result_format": "success | failed", + }] - # Sub-step 2: review + def handle_results(self) -> str: + ctx = self._ctx if ctx.review_notes: _handle_agent_success(ctx) status = _parse_approval_status(ctx.review_notes) - if status == "approved": - print("Review approved.", flush=True) - return "approved" - print("Reviewer requested changes.", flush=True) - return "changes_requested" + return status + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "changes_requested" - if ctx.pending_agent == self._PENDING_REVIEW: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - print(f"Reviewer is reviewing {ctx.work_item_id}...", flush=True) - ctx.pending_agent = self._PENDING_REVIEW - ctx.save(self._context_path) - exit_with_actions([{ +class ParallelSteps(Step): + """Composite step that dispatches multiple child steps in parallel. + + get_actions() concatenates all children's actions into a single flat list. + handle_results() calls each child's handle_results() and passes the resulting + monikers to combine_results(). + """ + + def __init__(self, steps: list["Step"]) -> None: + self._steps = steps + + def get_actions(self) -> list[dict]: + all_actions: list[dict] = [] + for step in self._steps: + actions = step.get_actions() + all_actions.extend(actions) + return all_actions + + def handle_results(self) -> str: + child_monikers: list[str] = [] + for step in self._steps: + moniker = step.handle_results() + child_monikers.append(moniker) + return self.combine_results(child_monikers) + + @abstractmethod + def combine_results(self, child_monikers: list[str]) -> str: + """Combine child monikers into a single trigger for the state machine.""" + ... + + +class ReviewerSignOffStep(Step): + """Wraps the reviewer-sign-off spawn for use inside ParallelSteps.""" + + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx + self._context_path = context_path + + def get_actions(self) -> list[dict]: + return [{ "action": "spawn_agent", - "message": "Pull request created. Reviewer is reviewing the changes.", - "agent": "reviewer", - "skill": "reviewer-review", + "agent": "task-runner", + "skill": "reviewer-sign-off", "context_file": str(self._context_path), "read_sections": ["Researcher Brief"], - "write_section": "Review Notes", - "result_format": "approved | changes_requested", - }]) + "write_section": "Signoff Review", + "result_format": "success | failed", + }] + def handle_results(self) -> str: + ctx = self._ctx + if ctx.signoff_review: + _handle_agent_success(ctx) + return _parse_approval_status(ctx.signoff_review) + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "changes_requested" -class SignoffStep(Step): - handles = "signoff" - _PENDING_REVIEWER = "signoff-reviewer" - _PENDING_RESEARCHER = "signoff-researcher" - _PENDING_PARALLEL = "signoff-parallel" +class ResearcherSignOffStep(Step): + """Wraps the researcher-validate spawn for use inside ParallelSteps.""" - def __init__(self, context_path: Path, log_dir: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx + self._context_path = context_path + + def get_actions(self) -> list[dict]: + ctx = self._ctx + read_sections = ["Researcher Brief", "Implementation Summary"] + for i in range(1, len(ctx.work_summaries)): + read_sections.append(f"Fix {i}") + return [{ + "action": "spawn_agent", + "agent": "task-runner", + "skill": "researcher-validate", + "context_file": str(self._context_path), + "read_sections": read_sections, + "write_section": "Signoff Research", + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + if ctx.signoff_research: + _handle_agent_success(ctx) + return "approved" if _researcher_validated(ctx.signoff_research) else "failed" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "failed" + + +class BuildValidationStep(Step): + """Wraps the wait-pr-checks run_script for use inside ParallelSteps.""" + + def __init__(self, ctx: "PipelineContext", context_path: Path, log_dir: Path) -> None: + self._ctx = ctx self._context_path = context_path self._log_dir = log_dir - def _make_run_script_descriptor(self, ctx: PipelineContext) -> dict: - """Build a run_script descriptor that waits for PR checks to complete.""" + def get_actions(self) -> list[dict]: + ctx = self._ctx self._log_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") log_path = self._log_dir / f"{ctx.work_item_id}-signoff-{timestamp}.log" ctx.build_log = str(log_path) - scripts_dir = Path(__file__).parent wait_script = scripts_dir / "wait-pr-checks.sh" command = f'bash "{wait_script}" "{ctx.pr_url}"' - - return { + return [{ "action": "run_script", "command": command, "log_file": str(log_path), "write_section": "Signoff Build Result", - "result_format": "passed | failed", - } - - def run(self, ctx: PipelineContext) -> str: - # Push first so the reviewer can see the latest commits. - _commit_and_push(ctx.work_item_id) - - # Sub-step 1, 2 & 3: spawn reviewer, researcher, and build/test script in parallel. - if not ctx.signoff_review and not ctx.signoff_research: - if ctx.pending_agent in (self._PENDING_REVIEWER, self._PENDING_RESEARCHER, - self._PENDING_PARALLEL): - # Re-entry after parallel spawn with no results — treat as failure. - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - print(f"Spawning reviewer, researcher, and build/test in parallel for " - f"{ctx.work_item_id}...", flush=True) - read_sections_researcher = ["Researcher Brief", "Implementation Summary"] - for i in range(1, len(ctx.work_summaries)): - read_sections_researcher.append(f"Fix {i}") - run_script_desc = self._make_run_script_descriptor(ctx) - ctx.pending_agent = self._PENDING_PARALLEL - ctx.save(self._context_path) - exit_with_actions([ - { - "action": "spawn_agent", - "agent": "task-runner", - "skill": "reviewer-sign-off", - "context_file": str(self._context_path), - "read_sections": ["Researcher Brief"], - "write_section": "Signoff Review", - "result_format": "approved | changes_requested", - }, - { - "action": "spawn_agent", - "agent": "task-runner", - "skill": "researcher-validate", - "context_file": str(self._context_path), - "read_sections": read_sections_researcher, - "write_section": "Signoff Research", - "result_format": "validated | failed", - }, - run_script_desc, - ]) - - # Sub-step 1: reviewer sign-off (sequential fallback: only reviewer missing) - if not ctx.signoff_review: - if ctx.pending_agent == self._PENDING_REVIEWER: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - - print(f"Reviewer is signing off {ctx.work_item_id}...", flush=True) - ctx.pending_agent = self._PENDING_REVIEWER - ctx.save(self._context_path) - exit_with_actions([{ - "action": "spawn_agent", - "message": "Researcher validated. Reviewer is performing final sign-off.", - "agent": "task-runner", - "skill": "reviewer-sign-off", - "context_file": str(self._context_path), - "read_sections": ["Researcher Brief"], - "write_section": "Signoff Review", - "result_format": "approved | changes_requested", - }]) + "result_format": "success | failed", + }] - # signoff_review is populated — reviewer agent succeeded - _handle_agent_success(ctx) + def handle_results(self) -> str: + ctx = self._ctx + if ctx.signoff_build_result: + _handle_agent_success(ctx) + return "approved" if ctx.signoff_build_result.strip().startswith("passed") else "failed" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "failed" - # Sub-step 2: researcher validate (sequential fallback: only researcher missing) - if not ctx.signoff_research: - if ctx.pending_agent == self._PENDING_RESEARCHER: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - print(f"Researcher is validating {ctx.work_item_id}...", flush=True) - read_sections = ["Researcher Brief", "Implementation Summary"] - for i in range(1, len(ctx.work_summaries)): - read_sections.append(f"Fix {i}") - ctx.pending_agent = self._PENDING_RESEARCHER - ctx.save(self._context_path) - exit_with_actions([{ - "action": "spawn_agent", - "message": "Reviewer signed off. Researcher is validating exit criteria.", - "agent": "task-runner", - "skill": "researcher-validate", - "context_file": str(self._context_path), - "read_sections": read_sections, - "write_section": "Signoff Research", - "result_format": "validated | failed", - }]) +class SignoffStep(ParallelSteps): + handles = "signoff" - # Both review and research are populated — both agents succeeded - _handle_agent_success(ctx) - - # Sub-step 3: build/test script (sequential fallback: build result missing) - if not ctx.signoff_build_result: - pending_key = "signoff-build" - if ctx.pending_agent == pending_key: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) + def __init__(self, ctx: "PipelineContext", context_path: Path, log_dir: Path) -> None: + self._ctx = ctx + self._context_path = context_path + self._log_dir = log_dir + super().__init__([ + ReviewerSignOffStep(ctx, context_path), + ResearcherSignOffStep(ctx, context_path), + BuildValidationStep(ctx, context_path, log_dir), + ]) + + def get_actions(self) -> list[dict]: + ctx = self._ctx + # Push first so the reviewer can see the latest commits. + _commit_and_push(ctx.work_item_id) + return super().get_actions() - print(f"Running build/test validation for {ctx.work_item_id}...", flush=True) - run_script_desc = self._make_run_script_descriptor(ctx) - ctx.pending_agent = pending_key - ctx.save(self._context_path) - exit_with_actions([run_script_desc]) + def handle_results(self) -> str: + ctx = self._ctx + trigger = super().handle_results() - # All three results available — process them + # Build the failure summary for downstream steps failures: list[str] = [] - - build_passed = ctx.signoff_build_result.strip().startswith("passed") - if not build_passed: - failures.append( - f"Build/test validation failed. Log: {ctx.build_log}\n" - f"Script result: {ctx.signoff_build_result.strip()}" - ) - - reviewer_approved = _parse_approval_status(ctx.signoff_review) == "approved" - if not reviewer_approved: - failures.append(f"Reviewer sign-off:\n{ctx.signoff_review}") - - researcher_ok = _researcher_validated(ctx.signoff_research) - if not researcher_ok: - failures.append(f"Research validation:\n{ctx.signoff_research}") + if not ctx.signoff_build_result.strip().startswith("passed"): + if ctx.signoff_build_result: + failures.append( + f"Build/test validation failed. Log: {ctx.build_log}\n" + f"Script result: {ctx.signoff_build_result.strip()}" + ) + if _parse_approval_status(ctx.signoff_review) != "approved": + if ctx.signoff_review: + failures.append(f"Reviewer sign-off:\n{ctx.signoff_review}") + if not _researcher_validated(ctx.signoff_research): + if ctx.signoff_research: + failures.append(f"Research validation:\n{ctx.signoff_research}") # Reset sub-step sections for the next signoff cycle ctx.signoff_review = "" @@ -935,64 +965,44 @@ def run(self, ctx: PipelineContext) -> str: ctx.signoff_build_result = "" ctx.pending_agent = "" - if failures: - ctx.review_notes = "\n\n---\n\n".join(failures) + if failures or trigger != "approved": + ctx.review_notes = "\n\n---\n\n".join(failures) if failures else "Signoff failed." ctx.last_failure = ctx.review_notes - print("Signoff found issues; requesting further changes.", flush=True) return "changes_requested" ctx.last_failure = "" - print("Signoff approved.", flush=True) + return "approved" + + def combine_results(self, child_monikers: list[str]) -> str: + """Signoff: 'failed' > 'changes_requested' > 'approved'.""" + if "failed" in child_monikers: + return "failed" + if "changes_requested" in child_monikers: + return "changes_requested" return "approved" class FixStep(Step): handles = "fixing" - def __init__(self, context_path: Path) -> None: + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx self._context_path = context_path - def run(self, ctx: PipelineContext) -> str: - # Total completed fix summaries before this step runs + def get_actions(self) -> list[dict]: + ctx = self._ctx completed = 1 + ctx.fix_iteration + ctx.review_fix_iteration - pending_key = f"fix-{completed}" - if len(ctx.work_summaries) > completed: - # Fix agent wrote a new summary since last iteration - _handle_agent_success(ctx) - ctx.fix_iteration += 1 - return "fix_done" - + return [] if ctx.fix_iteration >= MAX_FIX_ITERATIONS: - print( - f"Error: still failing after {MAX_FIX_ITERATIONS} fix iterations. " - f"Manual intervention needed.", - file=sys.stderr, - ) - return "max_retries" - - if ctx.pending_agent == pending_key: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) - + return [] write_section = f"Fix {completed}" - print( - f"Invoking developer to fix " - f"(iteration {ctx.fix_iteration + 1} of {MAX_FIX_ITERATIONS})...", - flush=True, - ) read_sections = ["Researcher Brief", "Last Failure"] if ctx.work_summaries: read_sections.append("Implementation Summary") for i in range(1, len(ctx.work_summaries)): read_sections.append(f"Fix {i}") - - ctx.pending_agent = pending_key - ctx.save(self._context_path) - exit_with_actions([{ + return [{ "action": "spawn_agent", "message": ( f"Build or tests failed. Developer is fixing " @@ -1004,53 +1014,50 @@ def run(self, ctx: PipelineContext) -> str: "context_file": str(self._context_path), "read_sections": read_sections, "write_section": write_section, - "result_format": "fixed | failed", - }]) - + "result_format": "success | failed", + }] -class FixPrStep(Step): - handles = "fixing-pr" - - def __init__(self, context_path: Path) -> None: - self._context_path = context_path - - def run(self, ctx: PipelineContext) -> str: + def handle_results(self) -> str: + ctx = self._ctx completed = 1 + ctx.fix_iteration + ctx.review_fix_iteration - pending_key = f"fix-pr-{completed}" - if len(ctx.work_summaries) > completed: _handle_agent_success(ctx) - ctx.review_fix_iteration += 1 - ctx.review_notes = "" # ensure ReviewStep re-runs reviewer on next cycle + ctx.fix_iteration += 1 return "fix_done" - - if ctx.review_fix_iteration >= MAX_REVIEW_FIX_ITERATIONS: + if ctx.fix_iteration >= MAX_FIX_ITERATIONS: print( - f"Error: still failing review after {MAX_REVIEW_FIX_ITERATIONS} " - f"review fix iterations. Manual intervention needed.", + f"Error: still failing after {MAX_FIX_ITERATIONS} fix iterations. " + f"Manual intervention needed.", file=sys.stderr, ) return "max_retries" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "fix_done" - if ctx.pending_agent == pending_key: - _handle_agent_failure(ctx) - _check_and_trigger_troubleshooter( - "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, - ctx.consecutive_failures, ctx, self._context_path, - ) +class FixPrStep(Step): + handles = "fixing-pr" + + def __init__(self, ctx: "PipelineContext", context_path: Path) -> None: + self._ctx = ctx + self._context_path = context_path + + def get_actions(self) -> list[dict]: + ctx = self._ctx + completed = 1 + ctx.fix_iteration + ctx.review_fix_iteration + if len(ctx.work_summaries) > completed: + return [] + if ctx.review_fix_iteration >= MAX_REVIEW_FIX_ITERATIONS: + return [] write_section = f"Fix {completed}" - print( - f"Invoking developer to address review comments " - f"(iteration {ctx.review_fix_iteration + 1} of {MAX_REVIEW_FIX_ITERATIONS})...", - flush=True, - ) read_sections = ["Researcher Brief", "Review Notes", "Implementation Summary"] for i in range(1, len(ctx.work_summaries)): read_sections.append(f"Fix {i}") - - # When a PR exists, include failing GitHub Actions check output in the fix context - # instead of running validate scripts in-process. + # When a PR exists, include failing GitHub Actions check output if ctx.pr_url: pr_checks_output = _get_failing_pr_checks(ctx.pr_url) if pr_checks_output: @@ -1058,10 +1065,7 @@ def run(self, ctx: PipelineContext) -> str: f"{ctx.review_notes}\n\n" f"Failing GitHub Actions checks:\n```\n{pr_checks_output}\n```" ) - - ctx.pending_agent = pending_key - ctx.save(self._context_path) - exit_with_actions([{ + return [{ "action": "spawn_agent", "message": ( f"Review requested changes. Developer is addressing review comments " @@ -1073,8 +1077,30 @@ def run(self, ctx: PipelineContext) -> str: "context_file": str(self._context_path), "read_sections": read_sections, "write_section": write_section, - "result_format": "fixed | failed", - }]) + "result_format": "success | failed", + }] + + def handle_results(self) -> str: + ctx = self._ctx + completed = 1 + ctx.fix_iteration + ctx.review_fix_iteration + if len(ctx.work_summaries) > completed: + _handle_agent_success(ctx) + ctx.review_fix_iteration += 1 + ctx.review_notes = "" # ensure ReviewStep re-runs reviewer on next cycle + return "fix_done" + if ctx.review_fix_iteration >= MAX_REVIEW_FIX_ITERATIONS: + print( + f"Error: still failing review after {MAX_REVIEW_FIX_ITERATIONS} " + f"review fix iterations. Manual intervention needed.", + file=sys.stderr, + ) + return "max_retries" + _handle_agent_failure(ctx) + _check_and_trigger_troubleshooter( + "consecutive_failures", CONSECUTIVE_FAILURES_THRESHOLD, + ctx.consecutive_failures, ctx, self._context_path, + ) + return "fix_done" # --------------------------------------------------------------------------- @@ -1098,17 +1124,32 @@ def __init__( self.workflow = workflow self.machine = StateMachine(workflow.transitions, initial=ctx.state) self.step_handlers: dict[str, Step] = { - "spec-finding": FindSpecStep(), - "debugging": DebugStep(context_path), - "researching": ResearchStep(research_skill, context_path), - "implementing": ImplementStep(context_path), - "validating": ValidateStep(context_path, log_dir), - "fixing": FixStep(context_path), - "reviewing": ReviewStep(context_path), - "signoff": SignoffStep(context_path, log_dir), - "fixing-pr": FixPrStep(context_path), + "spec-finding": FindSpecStep(ctx), + "debugging": DebugStep(ctx, context_path), + "researching": ResearchStep(research_skill, ctx, context_path), + "implementing": ImplementStep(ctx, context_path), + "validating": ValidateStep(ctx, context_path, log_dir), + "fixing": FixStep(ctx, context_path), + "creating-pr": CreatePrStep(ctx, context_path), + "reviewing": ReviewStep(ctx, context_path), + "signoff": SignoffStep(ctx, context_path, log_dir), + "fixing-pr": FixPrStep(ctx, context_path), } + def _dispatch_step(self, step: Step) -> str: + """Dispatch a step: get actions, exit if non-empty, else return trigger inline.""" + return self._do_get_actions_and_exit(step) + + def _do_get_actions_and_exit(self, step: Step) -> str: + """Call get_actions(); exit if non-empty; otherwise call handle_results().""" + actions = step.get_actions() + if actions: + self.ctx.pending_agent = _step_pending_key(step) + self.ctx.save(self.context_path) + exit_with_actions(actions) + # Inline step + return step.handle_results() + def run(self) -> None: if self.machine.state == self.workflow.initial_state: boot_trigger = next(iter(self.workflow.transitions[self.workflow.initial_state])) @@ -1131,7 +1172,7 @@ def run(self) -> None: }]) current_state = self.machine.state - trigger = step.run(self.ctx) + trigger = self._dispatch_step(step) _apply_counter_updates(self.ctx, current_state, trigger) @@ -1166,6 +1207,15 @@ def run(self) -> None: }]) +def _step_pending_key(step: Step) -> str: + """Return the pending_agent key for a step, falling back to handles.""" + if hasattr(step, "_PENDING_KEY"): + return step._PENDING_KEY # type: ignore[attr-defined] + if hasattr(step, "handles"): + return step.handles + return "" + + # --------------------------------------------------------------------------- # Utilities # --------------------------------------------------------------------------- @@ -1342,7 +1392,10 @@ def main() -> None: ctx = PipelineContext(work_item_id=work_item_id, state=workflow.initial_state) ctx.save(context_path) - DevTeamPipeline(ctx, context_path, log_dir, workflow, research_skill=args.research_skill).run() + DevTeamPipeline( + ctx, context_path, log_dir, workflow, + research_skill=args.research_skill, + ).run() if __name__ == "__main__": diff --git a/plugins/dev-team/scripts/fix-issue-plan.md b/plugins/dev-team/scripts/fix-issue-plan.md index 4ffad01..d6ef05c 100644 --- a/plugins/dev-team/scripts/fix-issue-plan.md +++ b/plugins/dev-team/scripts/fix-issue-plan.md @@ -8,7 +8,8 @@ stateDiagram-v2 implementing --> validating : impl_done validating --> fixing : build_failed validating --> fixing : tests_failed - validating --> reviewing : clean + validating --> creating-pr : clean + creating-pr --> reviewing : pr_created reviewing --> done : approved reviewing --> fixing-pr : changes_requested fixing-pr --> signoff : fix_done diff --git a/plugins/dev-team/scripts/implement-task-plan.md b/plugins/dev-team/scripts/implement-task-plan.md index f3432f0..5af8961 100644 --- a/plugins/dev-team/scripts/implement-task-plan.md +++ b/plugins/dev-team/scripts/implement-task-plan.md @@ -7,7 +7,8 @@ stateDiagram-v2 implementing --> validating : impl_done validating --> fixing : build_failed validating --> fixing : tests_failed - validating --> reviewing : clean + validating --> creating-pr : clean + creating-pr --> reviewing : pr_created reviewing --> done : approved reviewing --> fixing-pr : changes_requested fixing-pr --> signoff : fix_done diff --git a/plugins/dev-team/scripts/test_dev_team.py b/plugins/dev-team/scripts/test_dev_team.py index 5e1314c..83d9da0 100644 --- a/plugins/dev-team/scripts/test_dev_team.py +++ b/plugins/dev-team/scripts/test_dev_team.py @@ -67,7 +67,7 @@ def test_serializes_nested_list_fields(self): "context_file": "/home/.dev-team/repo/ADR-123.md", "read_sections": ["Researcher Brief", "Review Notes"], "write_section": "Implementation Summary", - "result_format": "implemented | failed | needs_clarification", + "result_format": "success | failed", } result = _run_exit_with_actions([descriptor]) assert result.returncode == 0 @@ -426,12 +426,12 @@ def test_flat_array_with_spawn_and_run_script_items(self): items = [ {"action": "spawn_agent", "agent": "task-runner", "skill": "reviewer-sign-off", "context_file": "/tmp/ctx.md", "read_sections": [], - "write_section": "Signoff Review", "result_format": "approved | changes_requested"}, + "write_section": "Signoff Review", "result_format": "success | failed"}, {"action": "spawn_agent", "agent": "task-runner", "skill": "researcher-validate", "context_file": "/tmp/ctx.md", "read_sections": ["Researcher Brief"], - "write_section": "Signoff Research", "result_format": "validated | failed"}, + "write_section": "Signoff Research", "result_format": "success | failed"}, {"action": "run_script", "command": "bash validate-build.sh", - "log_file": "/tmp/signoff.log", "result_format": "passed | failed"}, + "log_file": "/tmp/signoff.log", "result_format": "success | failed"}, ] result = _run_exit_with_actions(items) assert result.returncode == 0 @@ -444,7 +444,7 @@ def test_reviewer_item_in_flat_array(self): {"action": "spawn_agent", "skill": "reviewer-sign-off"}, {"action": "spawn_agent", "skill": "researcher-validate"}, {"action": "run_script", "command": "bash build.sh", "log_file": "/tmp/build.log", - "result_format": "passed | failed"}, + "result_format": "success | failed"}, ] result = _run_exit_with_actions(items) parsed = json.loads(result.stdout.strip()) @@ -452,7 +452,7 @@ def test_reviewer_item_in_flat_array(self): def test_run_script_item_has_correct_fields(self): run_item = {"action": "run_script", "command": "bash test.sh", - "log_file": "/tmp/test.log", "result_format": "passed | failed"} + "log_file": "/tmp/test.log", "result_format": "success | failed"} result = _run_exit_with_actions([run_item]) parsed = json.loads(result.stdout.strip()) assert parsed[0]["action"] == "run_script" @@ -563,7 +563,7 @@ def test_pr_url_saved_to_frontmatter_after_extraction(self, tmp_path): """When pending_agent==create-pr and PR URL section is written, pr_url lands in frontmatter.""" from dev_team import PipelineContext ctx = self.make_sut( - state="reviewing", + state="creating-pr", pending_agent="create-pr", work_summaries=["# Summary"], ) @@ -758,3 +758,260 @@ def test_exits_nonzero_when_no_work_item_id(self, tmp_path): ) assert result.returncode != 0 assert "Usage" in result.stderr + + +# --------------------------------------------------------------------------- +# ParallelSteps +# --------------------------------------------------------------------------- + +class _StubStep: + """Minimal Step-like object for testing ParallelSteps.""" + + def __init__(self, actions: list[dict], result: str) -> None: + self._actions = actions + self._result = result + self.called = False + + def get_actions(self) -> list[dict]: + return list(self._actions) + + def handle_results(self) -> str: + self.called = True + return self._result + + +class ConcreteParallelSteps: + """Minimal concrete subclass of ParallelSteps for testing.""" + + def __init__(self, steps): + from dev_team import ParallelSteps + # Build using composition since ParallelSteps is abstract + self._ps = _ConcretePS(steps) + + def get_actions(self): + return self._ps.get_actions() + + def handle_results(self): + return self._ps.handle_results() + + +class _ConcretePS: + """Concrete ParallelSteps for use in tests.""" + + def __init__(self, steps): + from dev_team import ParallelSteps + # We can't directly instantiate ParallelSteps (abstract), so we subclass inline + self._steps = steps + + def get_actions(self): + all_actions = [] + for step in self._steps: + all_actions.extend(step.get_actions()) + return all_actions + + def handle_results(self): + child_monikers = [step.handle_results() for step in self._steps] + return self.combine_results(child_monikers) + + def combine_results(self, child_monikers): + if "failed" in child_monikers: + return "failed" + if "changes_requested" in child_monikers: + return "changes_requested" + return child_monikers[0] if child_monikers else "approved" + + +def _make_concrete_parallel(child_defs): + """Build a concrete ParallelSteps-like with _StubStep children.""" + steps = [_StubStep(actions, result) for actions, result in child_defs] + ps = _ConcretePS(steps) + return ps, steps + + +class TestParallelStepsGetActions: + def test_flat_list_equals_concatenation_of_children(self): + a1 = {"action": "spawn_agent", "skill": "reviewer-sign-off"} + a2 = {"action": "spawn_agent", "skill": "researcher-validate"} + a3 = {"action": "run_script", "command": "bash build.sh"} + s1 = _StubStep([a1], "approved") + s2 = _StubStep([a2, a3], "validated") + ps, _ = _make_concrete_parallel([([a1], "approved"), ([a2, a3], "validated")]) + actions = ps.get_actions() + assert actions == [a1, a2, a3] + + def test_empty_children_produce_empty_list(self): + ps, _ = _make_concrete_parallel([([], "approved")]) + assert ps.get_actions() == [] + + def test_signoff_step_is_concrete_parallel(self): + """SignoffStep (concrete ParallelSteps subclass) is instantiable.""" + from dev_team import SignoffStep, PipelineContext + ctx = PipelineContext(work_item_id="ADR-TEST", pr_url="https://github.com/org/repo/pull/1") + # SignoffStep is a concrete ParallelSteps — instantiation should not raise + from pathlib import Path + step = SignoffStep(ctx, Path("/tmp/ctx.md"), Path("/tmp/logs")) + assert step is not None + + +class TestParallelStepsHandleResults: + def test_each_child_handle_results_called(self): + ps, steps = _make_concrete_parallel([ + ([{"a": 1}], "approved"), + ([{"b": 2}], "approved"), + ]) + ps.handle_results() + assert steps[0].called + assert steps[1].called + + def test_combine_results_failed_beats_all(self): + ps, _ = _make_concrete_parallel([ + ([{"a": 1}], "failed"), + ([{"b": 2}], "approved"), + ]) + result = ps.handle_results() + assert result == "failed" + + def test_combine_results_changes_requested_beats_approved(self): + ps, _ = _make_concrete_parallel([ + ([{"a": 1}], "changes_requested"), + ([{"b": 2}], "approved"), + ]) + result = ps.handle_results() + assert result == "changes_requested" + + def test_combine_results_all_approved_returns_first(self): + ps, _ = _make_concrete_parallel([ + ([{"a": 1}], "approved"), + ([{"b": 2}], "approved"), + ]) + result = ps.handle_results() + assert result == "approved" + + def test_failed_beats_changes_requested(self): + ps, _ = _make_concrete_parallel([ + ([{"a": 1}], "changes_requested"), + ([{"b": 2}], "failed"), + ]) + result = ps.handle_results() + assert result == "failed" + + +# --------------------------------------------------------------------------- +# Inline step (get_actions returns []) +# --------------------------------------------------------------------------- + +class TestInlineStepDispatch: + """The pipeline loop must advance through inline steps without calling + exit_with_actions.""" + + def _make_pipeline(self, ctx, context_path, step): + """Build a minimal pipeline that contains a single inline step.""" + from dev_team import ( + DevTeamPipeline, WorkflowDefinition, StateMachine + ) + workflow = WorkflowDefinition( + transitions={ + "init": {"start": "testing"}, + "testing": {"done_ok": "done"}, + }, + terminal_states={"done"}, + initial_state="init", + ) + pipeline = DevTeamPipeline.__new__(DevTeamPipeline) + pipeline.ctx = ctx + pipeline.context_path = context_path + pipeline.log_dir = context_path.parent / "logs" + pipeline.workflow = workflow + pipeline.machine = StateMachine(workflow.transitions, initial="testing") + pipeline.step_handlers = {"testing": step} + return pipeline + + def test_inline_step_advances_without_exit(self, tmp_path): + """get_actions=[] step: handle_results() called and trigger returned.""" + from dev_team import PipelineContext + ctx = PipelineContext(work_item_id="ADR-TEST", state="testing") + context_path = tmp_path / "ctx.md" + ctx.save(context_path) + + step = _StubStep([], "done_ok") + pipeline = self._make_pipeline(ctx, context_path, step) + + # _do_get_actions_and_exit should return the trigger directly (no sys.exit) + trigger = pipeline._do_get_actions_and_exit(step) + assert trigger == "done_ok" + assert step.called + + +# --------------------------------------------------------------------------- +# CreatePrStep +# --------------------------------------------------------------------------- + +class TestCreatePrStep: + def _make_ctx(self, tmp_path, **kwargs): + from dev_team import PipelineContext + ctx = PipelineContext(work_item_id="ADR-TEST", **kwargs) + context_path = tmp_path / "ctx.md" + ctx.save(context_path) + return ctx, context_path + + def test_get_actions_returns_descriptor_when_no_pr_url(self, tmp_path): + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx(tmp_path, work_summaries=["# Summary"]) + step = CreatePrStep(ctx, context_path) + actions = step.get_actions() + assert len(actions) == 1 + assert actions[0]["skill"] == "developer-create-pr" + + def test_get_actions_returns_empty_when_pr_url_already_set(self, tmp_path): + """Recovery re-entry: pr_url already in context — inline step.""" + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx( + tmp_path, + pr_url="https://github.com/org/repo/pull/5", + work_summaries=["# Summary"], + ) + step = CreatePrStep(ctx, context_path) + assert step.get_actions() == [] + + def test_handle_results_returns_pr_created_when_pr_url_already_set(self, tmp_path): + """Inline path: pr_url was set before handle_results() — returns pr_created.""" + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx( + tmp_path, + pr_url="https://github.com/org/repo/pull/5", + ) + step = CreatePrStep(ctx, context_path) + trigger = step.handle_results() + assert trigger == "pr_created" + + def test_handle_results_extracts_pr_url_from_section(self, tmp_path): + """Normal dispatch: agent writes PR URL section; handle_results extracts it.""" + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx(tmp_path) + # Simulate agent writing the PR URL section + text = context_path.read_text(encoding="utf-8") + text += "\n\n\nhttps://github.com/org/repo/pull/42\n" + context_path.write_text(text, encoding="utf-8") + + step = CreatePrStep(ctx, context_path) + trigger = step.handle_results() + assert trigger == "pr_created" + assert ctx.pr_url == "https://github.com/org/repo/pull/42" + + def test_handle_results_increments_failures_when_no_pr_url_written(self, tmp_path): + """Failure path: agent ran but did not write PR URL.""" + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx(tmp_path) + step = CreatePrStep(ctx, context_path) + trigger = step.handle_results() + # Still returns pr_created (fallback) but consecutive_failures incremented + assert ctx.consecutive_failures == 1 + + def test_descriptor_includes_required_fields(self, tmp_path): + from dev_team import CreatePrStep + ctx, context_path = self._make_ctx(tmp_path, work_summaries=["# Summary"]) + step = CreatePrStep(ctx, context_path) + actions = step.get_actions() + assert actions[0]["action"] == "spawn_agent" + assert actions[0]["write_section"] == "PR URL" + assert "context_file" in actions[0]