Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions internal/engine/conflict_fallback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,50 @@ import (
"github.com/tzone85/vortex-dispatch/internal/llm"
)

// When the senior CLI surfaces a session-limit / capacity notice as SUCCESSFUL
// content (not an error envelope), the resolver must NOT mistake it for a merged
// file and write it to disk — that corrupts the source while the rebase
// "succeeds". It must surface a capacity error so the pipeline pauses-and-resumes
// after the limit resets. This mirrors the guard resolveFileTechLead already has;
// the senior fast path previously lacked it (the common single-file conflict).
func TestRebaseWithResolution_SeniorCapacityContentDoesNotCorruptFile(t *testing.T) {
_, worktreeDir := setupDivergentRepos(t, true)

// The senior CLI returns a session-limit notice as ordinary content — it has
// no conflict markers and matches no chatter marker, so without the capacity
// guard it would be returned as a clean resolution and written to feature.go.
notice := llm.CompletionResponse{Content: "You've hit your session limit · resets 3pm. Please try again later."}
client := llm.NewReplayClient(notice, notice, notice, notice)

// No tech lead configured (senior-only) — the common single-file path.
cr := NewConflictResolver(client, "test-model", nil, "", 4096, nil, nil)

err := cr.RebaseWithResolution(context.Background(), "s-capacity", worktreeDir, "origin/main")
if err == nil {
t.Fatal("expected a capacity error, got nil (file would have been corrupted)")
}
if !llm.IsCapacityError(err) {
t.Fatalf("expected IsCapacityError, got: %v", err)
}

// The capacity notice must NOT have been written into the source file.
got, rErr := os.ReadFile(filepath.Join(worktreeDir, "feature.go"))
if rErr != nil {
t.Fatal(rErr)
}
if strings.Contains(string(got), "session limit") {
t.Errorf("capacity notice was written into the source file:\n%s", got)
}

// The rebase must have been aborted, not left in progress.
status := exec.Command("git", "status")
status.Dir = worktreeDir
out, _ := status.CombinedOutput()
if strings.Contains(string(out), "rebase in progress") {
t.Errorf("rebase still in progress after capacity abort:\n%s", out)
}
}

// When the LLM returns conversational COMMENTARY instead of merged file content,
// the resolver must NOT abort and thrash the story through every escalation tier
// forever (the clipforge/pulsereview failure). Instead it deterministically
Expand Down
11 changes: 11 additions & 0 deletions internal/engine/conflict_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,17 @@ File: %s
return "", err
}

// Defensive: some CLI versions surface a session-limit / overloaded notice as
// successful content rather than an error envelope. Without this guard the
// notice passes the marker/chatter checks below, is returned as a clean
// resolution, and gets written verbatim into the source file — corrupting it
// while the rebase "succeeds". Surface it as a capacity error (NOT
// errUnmergeable) so the caller aborts and the pipeline pauses-and-resumes
// after the limit resets, exactly as resolveFileTechLead already does.
if llm.ContainsCapacitySignature(resp.Content) {
return "", &llm.APIError{StatusCode: 429, Message: resp.Content, Retryable: true}
}

resolved := extractResolvedFileContent(resp.Content)

// Sanity check: resolved content must not contain conflict markers.
Expand Down
18 changes: 14 additions & 4 deletions internal/engine/security_gate.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const securityReviewTimeout = 3 * time.Minute

// scanFunc runs the deterministic scanners against a repo. It is a seam so tests
// can supply canned findings instead of invoking real tools.
type scanFunc func(ctx context.Context, repoDir string) (findings []security.Finding, ran, skipped []security.ScannerKind)
type scanFunc func(ctx context.Context, repoDir string) (findings []security.Finding, ran, skipped, failed []security.ScannerKind)

// SecurityGate is vxd's security agent. It combines deterministic SAST/secret/
// dependency scanners with an LLM threat-model review driven by a growable
Expand Down Expand Up @@ -81,7 +81,10 @@ func (g *SecurityGate) ScanRepo(ctx context.Context, repoDir string) (security.R
return security.Report{}, fmt.Errorf("load knowledge base: %w", err)
}

findings, ran, skipped := g.scan(ctx, repoDir)
findings, ran, skipped, failed := g.scan(ctx, repoDir)
if len(failed) > 0 {
log.Printf("[security-gate] scan coverage lost: %d scanner(s) failed: %v", len(failed), failed)
}

if g.client != nil {
findings = append(findings, g.llmReview(ctx, repoDir, langs, kb)...)
Expand All @@ -93,6 +96,7 @@ func (g *SecurityGate) ScanRepo(ctx context.Context, repoDir string) (security.R
Languages: langs,
ScannersRun: ran,
Skipped: skipped,
Failed: failed,
Findings: findings,
KBVersion: kb.Version,
}
Expand All @@ -119,13 +123,19 @@ func (g *SecurityGate) ReviewStory(ctx context.Context, storyID, title, diff, re
return false, "", fmt.Errorf("load knowledge base: %w", kbErr)
}

findings, _, _ := g.scan(ctx, repoDir)
findings, _, _, failed := g.scan(ctx, repoDir)
if len(failed) > 0 {
// Coverage was lost for this story's gate. Per policy a scanner failure
// never blocks the merge, but it must be visible — silently passing the
// story would report it as secure when part of the scan never ran.
log.Printf("[security-gate] story %s: scan coverage lost, %d scanner(s) failed: %v", storyID, len(failed), failed)
}
if g.client != nil {
findings = append(findings, g.llmReviewDiff(ctx, title, diff, langs, kb)...)
}
findings = security.DedupeFindings(findings)

report := security.Report{RepoDir: repoDir, Languages: langs, Findings: findings, KBVersion: kb.Version}
report := security.Report{RepoDir: repoDir, Languages: langs, Failed: failed, Findings: findings, KBVersion: kb.Version}
blocked := report.HasAtLeast(g.gateSeverity)

if g.autoLearn {
Expand Down
39 changes: 34 additions & 5 deletions internal/engine/security_gate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ func countEvents(t *testing.T, es state.EventStore, typ state.EventType) int {
return len(evts)
}

// fakeScan returns a seam that yields canned findings.
func fakeScan(findings ...security.Finding) func(context.Context, string) ([]security.Finding, []security.ScannerKind, []security.ScannerKind) {
return func(context.Context, string) ([]security.Finding, []security.ScannerKind, []security.ScannerKind) {
return findings, []security.ScannerKind{security.ScannerGosec}, []security.ScannerKind{security.ScannerSemgrep}
// fakeScan returns a seam that yields canned findings (no failed scanners).
func fakeScan(findings ...security.Finding) scanFunc {
return func(context.Context, string) ([]security.Finding, []security.ScannerKind, []security.ScannerKind, []security.ScannerKind) {
return findings, []security.ScannerKind{security.ScannerGosec}, []security.ScannerKind{security.ScannerSemgrep}, nil
}
}

func newTestSecurityGate(t *testing.T, client llm.Client, kbPath string, gateSev security.Severity, autoLearn bool, scan func(context.Context, string) ([]security.Finding, []security.ScannerKind, []security.ScannerKind)) *SecurityGate {
func newTestSecurityGate(t *testing.T, client llm.Client, kbPath string, gateSev security.Severity, autoLearn bool, scan scanFunc) *SecurityGate {
es, ps := newSecurityTestStores(t)
g := NewSecurityGate(client, "test-model", 1000, kbPath, gateSev, autoLearn, es, ps)
g.scan = scan
Expand All @@ -67,6 +67,35 @@ func TestSecurityGate_ScanRepo_AggregatesAndEmits(t *testing.T) {
}
}

// A scanner that ran but FAILED (timeout / crash / parse error) must be
// surfaced in the report's Failed list, never silently dropped or counted as a
// clean run. Otherwise a build whose scan partly failed looks scanned-clean.
func TestSecurityGate_ScanRepo_SurfacesFailedScanners(t *testing.T) {
kbPath := filepath.Join(t.TempDir(), "kb.json")
// Seam: gosec ran clean, semgrep was not installed, gitleaks ran but errored.
scan := func(context.Context, string) ([]security.Finding, []security.ScannerKind, []security.ScannerKind, []security.ScannerKind) {
return nil,
[]security.ScannerKind{security.ScannerGosec},
[]security.ScannerKind{security.ScannerSemgrep},
[]security.ScannerKind{security.ScannerGitleaks}
}
g := newTestSecurityGate(t, nil, kbPath, security.SeverityHigh, false, scan)

report, err := g.ScanRepo(context.Background(), t.TempDir())
if err != nil {
t.Fatalf("ScanRepo: %v", err)
}
if len(report.Failed) != 1 || report.Failed[0] != security.ScannerGitleaks {
t.Errorf("expected gitleaks in report.Failed, got %v", report.Failed)
}
// A failed scanner must NOT be mislabeled as a clean run.
for _, k := range report.ScannersRun {
if k == security.ScannerGitleaks {
t.Errorf("failed scanner gitleaks must not appear in ScannersRun: %v", report.ScannersRun)
}
}
}

func TestSecurityGate_ReviewStory_BlocksOnCritical(t *testing.T) {
kbPath := filepath.Join(t.TempDir(), "kb.json")
crit := security.Finding{Tool: "gosec", RuleID: "G101", Severity: security.SeverityCritical, File: "a.go", Line: 2, Title: "hardcoded creds", Source: "scanner"}
Expand Down
15 changes: 11 additions & 4 deletions internal/security/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@ import (
"strings"
)

// Report is the aggregated outcome of a scan: which scanners ran, which were
// skipped (applicable but not installed), the deduplicated findings, and the
// knowledge-base version that informed the LLM pass.
// Report is the aggregated outcome of a scan: which scanners ran clean, which
// were skipped (applicable but not installed), which ran but errored (coverage
// lost), the deduplicated findings, and the knowledge-base version that informed
// the LLM pass.
type Report struct {
RepoDir string `json:"repo_dir"`
Languages []string `json:"languages"`
ScannersRun []ScannerKind `json:"scanners_run"`
Skipped []ScannerKind `json:"skipped"`
Failed []ScannerKind `json:"failed,omitempty"`
Findings []Finding `json:"findings"`
KBVersion int `json:"kb_version"`
}
Expand Down Expand Up @@ -70,7 +72,12 @@ func (r Report) FormatMarkdown() string {
for i, s := range r.Skipped {
skip[i] = string(s)
}
fmt.Fprintf(&b, "Skipped (not installed): %s\n\n", joinOrNone(skip))
fmt.Fprintf(&b, "Skipped (not installed): %s\n", joinOrNone(skip))
fail := make([]string, len(r.Failed))
for i, s := range r.Failed {
fail[i] = string(s)
}
fmt.Fprintf(&b, "Failed (ran but errored — coverage lost): %s\n\n", joinOrNone(fail))

// Findings, most severe first, then by file for stable output.
sorted := make([]Finding, len(r.Findings))
Expand Down
3 changes: 2 additions & 1 deletion internal/security/report_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ func sampleReport() Report {
Languages: []string{"go"},
ScannersRun: []ScannerKind{ScannerGosec, ScannerGitleaks},
Skipped: []ScannerKind{ScannerSemgrep},
Failed: []ScannerKind{ScannerNpmAudit},
KBVersion: 1,
Findings: []Finding{
{Tool: "gitleaks", RuleID: "aws", Severity: SeverityCritical, File: "x.env", Line: 1, Title: "AWS key"},
Expand Down Expand Up @@ -47,7 +48,7 @@ func TestReport_HasAtLeast(t *testing.T) {

func TestReport_FormatMarkdown(t *testing.T) {
md := sampleReport().FormatMarkdown()
for _, want := range []string{"AWS key", "x.env", "CRITICAL", "gosec", "Skipped"} {
for _, want := range []string{"AWS key", "x.env", "CRITICAL", "gosec", "Skipped", "Failed", "coverage lost", string(ScannerNpmAudit)} {
if !strings.Contains(md, want) {
t.Errorf("markdown missing %q\n---\n%s", want, md)
}
Expand Down
25 changes: 18 additions & 7 deletions internal/security/scanners.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"context"
"encoding/json"
"log"
"os/exec"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -76,10 +77,15 @@ func applicableScanners(langs []string, available map[string]bool) []Scanner {
}

// RunScanners runs every applicable+available scanner against repoDir and
// returns deduped findings, the scanners that ran, and the applicable scanners
// that were skipped because they are not installed. One scanner failing (parse
// or exec error) is swallowed so a single broken tool never aborts the scan.
func RunScanners(ctx context.Context, repoDir string) (findings []Finding, ran, skipped []ScannerKind) {
// returns deduped findings, the scanners that ran clean, the applicable scanners
// that were skipped because they are not installed, and the scanners that ran
// but errored (exec crash, timeout, parse failure). A scanner failing never
// aborts the scan, but its failure is NOT silently swallowed: it is logged and
// reported in `failed` rather than counted as a clean run — otherwise a tool
// that failed to inspect the code is indistinguishable from one that found
// nothing, and the security gate would report a build as scanned-clean when
// coverage was actually lost.
func RunScanners(ctx context.Context, repoDir string) (findings []Finding, ran, skipped, failed []ScannerKind) {
langs := DetectLanguages(repoDir)
available := map[string]bool{}
for _, s := range allScanners() {
Expand All @@ -95,14 +101,19 @@ func RunScanners(ctx context.Context, repoDir string) (findings []Finding, ran,
skipped = append(skipped, s.Kind)
continue
}
ran = append(ran, s.Kind)
fs, err := s.Run(ctx, repoDir)
if err != nil {
continue // graceful: log handled by caller; keep going
// Graceful degradation: keep scanning with the other tools, but make
// the coverage loss visible — a failed scan must never masquerade as
// a clean one.
failed = append(failed, s.Kind)
log.Printf("[security] scanner %s failed (coverage lost for this tool): %v", s.Kind, err)
continue
}
ran = append(ran, s.Kind)
findings = append(findings, fs...)
}
return DedupeFindings(findings), ran, skipped
return DedupeFindings(findings), ran, skipped, failed
}

// DetectScanners returns the scanners applicable to repoDir and available on the
Expand Down
Loading