From b873bd023d3ddc6d10f79f50a0e6ad0f043c4024 Mon Sep 17 00:00:00 2001 From: Arturo Peroni Date: Thu, 4 Jun 2026 11:48:28 +0200 Subject: [PATCH] fix: address Bugbot review on #49 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - dataset push --output-json: emit a JSON error object on early-failure paths (validation, discovery, staging, token, port-forward), not just on dry-run/post-submit. runDatasetPush now uses a named return + a jsonEmitted flag + a defer, so '… --output-json | jq' always gets JSON instead of empty stdout on a failure. Adds Error/ExitCode to the result shape + writePushErrorJSON; covered by TestRunDatasetPush_OutputJSONEarlyFailureEmitsJSON. - dataset rm: on a partial teardown (DROP TABLE succeeded but the PVC file removal failed), the error now says so and points the user to re-run — both teardown ops are idempotent (DROP TABLE IF EXISTS / rm -rf), so a re-run completes cleanup rather than reporting a flat 'teardown failed'. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/cli/coverage_test.go | 28 +++++++++++++++++++ internal/cli/dataset.go | 52 +++++++++++++++++++++++++++++++---- internal/cli/dataset_rm.go | 9 ++++++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/internal/cli/coverage_test.go b/internal/cli/coverage_test.go index 9269d00..0150ffa 100644 --- a/internal/cli/coverage_test.go +++ b/internal/cli/coverage_test.go @@ -122,6 +122,34 @@ func TestClassifyPushOutcome(t *testing.T) { } } +// TestRunDatasetPush_OutputJSONEarlyFailureEmitsJSON: with --output-json, +// a failure before the dry-run/submit emit points (here an invalid table, +// exit 2) still writes a JSON error object to stdout — the stdout-always- +// JSON contract. (Bugbot #49) +func TestRunDatasetPush_OutputJSONEarlyFailureEmitsJSON(t *testing.T) { + var jsonBuf, human bytes.Buffer + a := runDatasetPushArgs{ + LocalPath: "./x", + Spec: push.SpecArgs{Table: "../bad", Category: "image_classification", Intent: "train"}, + Printer: ui.New(&human, ui.WithColor(false)), + OutputJSON: true, + JSONOut: &jsonBuf, + } + err := runDatasetPush(context.Background(), &human, &human, a) + + var ee *exitError + if !errors.As(err, &ee) || ee.Code() != 2 { + t.Fatalf("err = %v, want *exitError code 2", err) + } + var got pushJSONResult + if e := json.Unmarshal(jsonBuf.Bytes(), &got); e != nil { + t.Fatalf("stdout is not JSON: %v\n%s", e, jsonBuf.String()) + } + if got.Status != "error" || got.ExitCode != 2 || got.Table != "../bad" { + t.Errorf("got %+v, want status=error exit_code=2 table=../bad", got) + } +} + // TestExpandHome covers the #37 fix: a leading ~ / ~/… resolves under // $HOME, while relative, absolute, and empty paths pass through // untouched (the case that bit the interactive prompt — the shell diff --git a/internal/cli/dataset.go b/internal/cli/dataset.go index 1188cc5..4b562be 100644 --- a/internal/cli/dataset.go +++ b/internal/cli/dataset.go @@ -340,7 +340,24 @@ func expandHome(path string) string { // need the cluster runs before any that does, so a customer with // a bad label-column or oversized dataset gets the diagnostic in // milliseconds without a kubeconfig round-trip. -func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPushArgs) error { +func runDatasetPush(ctx context.Context, out, errOut io.Writer, a runDatasetPushArgs) (err error) { + // In --output-json mode, guarantee stdout always carries a JSON + // object. The dry-run + post-submit paths emit a result and set + // jsonEmitted; this defer covers every early-failure return (bad + // table, discovery, staging, token, port-forward) with a JSON error + // object, so `… --output-json | jq` never sees empty stdout. (Bugbot #49) + jsonEmitted := false + defer func() { + if a.OutputJSON && err != nil && !jsonEmitted { + code := 1 + var ee *exitError + if errors.As(err, &ee) { + code = ee.Code() + } + writePushErrorJSON(a.JSONOut, a.Spec, err, code) + } + }() + // Intro header: brand + a plain-English explainer of what a push // does, so a first-time user understands it before any prompts. // Routed through a.Printer, so --output-json keeps it on stderr and @@ -430,10 +447,9 @@ contributors train against it without ever seeing the raw files.`)) // data CSV. The walk also yields what the per-category // resolution below needs (the image list for target-size, the // CSV for schema inference). - var ( - layout *push.LocalLayout - err error - ) + // err is the function's named return (see the --output-json defer + // at the top), so it's not redeclared here. + var layout *push.LocalLayout switch { case push.IsTabular(a.Spec.Category): layout, err = push.DiscoverTabular(a.LocalPath) @@ -602,6 +618,7 @@ contributors train against it without ever seeing the raw files.`)) a.Printer.Hintf("A real run continues with step 3 (stage your files) and step 4 (run the ingestion).") if a.OutputJSON { writePushJSON(a.JSONOut, "dry-run", spec, nil, "", "") + jsonEmitted = true } return nil } @@ -721,6 +738,7 @@ contributors train against it without ever seeing the raw files.`)) } } writePushJSON(a.JSONOut, status, spec, summary, ns, jobName) + jsonEmitted = true } if exitErr != nil { @@ -837,13 +855,15 @@ func printClusterSummary(p *ui.Printer, release *cluster.ParentRelease, pvc *clu // It's a presentation type owned by the CLI layer, so submit.Summary // stays json-tag-free and this wire format can evolve independently. type pushJSONResult struct { - Status string `json:"status"` // dry-run|succeeded|completed_with_failures|failed|detached|unknown|auth_error|submit_error|watch_error + Status string `json:"status"` // dry-run|succeeded|completed_with_failures|failed|detached|unknown|auth_error|submit_error|watch_error|error Table string `json:"table"` Category string `json:"category"` Intent string `json:"intent"` Namespace string `json:"namespace,omitempty"` JobName string `json:"job_name,omitempty"` Summary *pushJSONSummary `json:"summary,omitempty"` + Error string `json:"error,omitempty"` + ExitCode int `json:"exit_code,omitempty"` } type pushJSONSummary struct { @@ -887,3 +907,23 @@ func writePushJSON(w io.Writer, status string, spec map[string]any, s *submit.Su } _, _ = fmt.Fprintln(w, string(b)) } + +// writePushErrorJSON emits a JSON error object for --output-json runs +// that fail before a result is produced (validation, discovery, +// staging, token, port-forward). Keeps the stdout-always-JSON contract +// so a script parsing it never hits empty output on failure. (Bugbot #49) +func writePushErrorJSON(w io.Writer, sp push.SpecArgs, e error, code int) { + res := pushJSONResult{ + Status: "error", + Table: sp.Table, + Category: sp.Category, + Intent: sp.Intent, + Error: e.Error(), + ExitCode: code, + } + b, err := json.MarshalIndent(res, "", " ") + if err != nil { + return + } + _, _ = fmt.Fprintln(w, string(b)) +} diff --git a/internal/cli/dataset_rm.go b/internal/cli/dataset_rm.go index 6365ecb..761750c 100644 --- a/internal/cli/dataset_rm.go +++ b/internal/cli/dataset_rm.go @@ -182,6 +182,15 @@ undone — re-pushing the data is the only way back.`) p.Infof("Removing in-cluster artifacts…") res, err := push.Teardown(ctx, cs, resolved.RestConfig, resolved.Namespace, plan) if err != nil { + // Teardown is two sequential destructive ops; if the table drop + // succeeded but file removal didn't, say so — both ops are + // idempotent, so re-running completes the cleanup. (Bugbot #49) + if res.DroppedTable { + return &exitError{code: 7, err: fmt.Errorf( + "teardown incomplete — the table %s.%s was dropped, but removing its files failed; "+ + "re-run `tracebloc dataset rm %s` to remove the leftover files: %w", + plan.Database, plan.Table, a.Table, err)} + } return &exitError{code: 7, err: fmt.Errorf("teardown failed: %w", err)} }