diff --git a/control-plane/internal/cli/execution.go b/control-plane/internal/cli/execution.go index 6deb82cb0..da38450b7 100644 --- a/control-plane/internal/cli/execution.go +++ b/control-plane/internal/cli/execution.go @@ -23,6 +23,7 @@ func NewExecutionCommand() *cobra.Command { cmd.AddCommand(newCancelExecutionCommand()) cmd.AddCommand(newPauseExecutionCommand()) cmd.AddCommand(newResumeExecutionCommand()) + cmd.AddCommand(newRestartExecutionCommand()) return cmd } @@ -32,6 +33,11 @@ type executionActionOptions struct { timeout time.Duration jsonOutput bool reason string + scope string + reuse string + fork bool + model string + input string } func defaultExecutionActionOptions() executionActionOptions { @@ -42,6 +48,44 @@ func defaultExecutionActionOptions() executionActionOptions { } } +func newRestartExecutionCommand() *cobra.Command { + opts := defaultExecutionActionOptions() + opts.scope = "workflow" + opts.reuse = "succeeded-before" + + cmd := &cobra.Command{ + Use: "restart ", + Short: "Restart a workflow from an execution point", + Long: "Start a new run from an existing execution. By default, restarts the containing workflow and reuses successful app.call outputs before that point.", + Args: cobra.ExactArgs(1), + RunE: func(_ *cobra.Command, args []string) error { + body, err := buildRestartExecutionBody(opts) + if err != nil { + return err + } + _, err = runExecutionAction(executionActionConfig{ + actionName: "restart", + successVerb: "restarted", + endpoint: "/api/v1/executions/%s/restart", + opts: &opts, + executionID: args[0], + withBody: true, + body: body, + }) + return err + }, + } + + cmd.Flags().StringVar(&opts.scope, "scope", opts.scope, "Restart scope: workflow or execution") + cmd.Flags().StringVar(&opts.reuse, "reuse", opts.reuse, "Replay reuse mode: succeeded-before, all-succeeded, or none") + cmd.Flags().BoolVar(&opts.fork, "fork", false, "Mark this restart as a fork with intentional changes") + cmd.Flags().StringVar(&opts.model, "model", "", "Model override to send in restart context") + cmd.Flags().StringVar(&opts.input, "input", "", "JSON input override or @path to a JSON file") + cmd.Flags().StringVar(&opts.reason, "reason", "", "Reason for restarting the execution") + bindExecutionActionFlags(cmd, &opts) + return cmd +} + func newCancelExecutionCommand() *cobra.Command { opts := defaultExecutionActionOptions() @@ -129,6 +173,8 @@ type executionActionConfig struct { opts *executionActionOptions executionID string withReason bool + withBody bool + body map[string]interface{} } func runExecutionAction(cfg executionActionConfig) (map[string]any, error) { @@ -139,7 +185,25 @@ func runExecutionAction(cfg executionActionConfig) (map[string]any, error) { server = strings.TrimSuffix(server, "/") var bodyBytes []byte - if cfg.withReason { + if cfg.withBody { + payload := map[string]interface{}{} + for key, value := range cfg.body { + switch typed := value.(type) { + case string: + if strings.TrimSpace(typed) != "" { + payload[key] = typed + } + case nil: + default: + payload[key] = value + } + } + encoded, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("encode payload: %w", err) + } + bodyBytes = encoded + } else if cfg.withReason { payload := map[string]string{} if strings.TrimSpace(cfg.opts.reason) != "" { payload["reason"] = cfg.opts.reason @@ -165,7 +229,7 @@ func runExecutionAction(cfg executionActionConfig) (map[string]any, error) { if err != nil { return nil, fmt.Errorf("build request: %w", err) } - if cfg.withReason { + if cfg.withReason || cfg.withBody { req.Header.Set("Content-Type", "application/json") } if cfg.opts.token != "" { @@ -200,9 +264,78 @@ func runExecutionAction(cfg executionActionConfig) (map[string]any, error) { return parsed, nil } +func buildRestartExecutionBody(opts executionActionOptions) (map[string]interface{}, error) { + body := map[string]interface{}{ + "scope": opts.scope, + "reuse": opts.reuse, + } + if strings.TrimSpace(opts.reason) != "" { + body["reason"] = opts.reason + } + if opts.fork { + body["fork"] = true + } + context := map[string]interface{}{} + if strings.TrimSpace(opts.model) != "" { + context["model"] = strings.TrimSpace(opts.model) + } + if len(context) > 0 { + body["context"] = context + } + if strings.TrimSpace(opts.input) != "" { + input, err := parseRestartInput(opts.input) + if err != nil { + return nil, err + } + body["input"] = input + } + return body, nil +} + +func parseRestartInput(value string) (map[string]interface{}, error) { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return nil, nil + } + raw := []byte(trimmed) + if strings.HasPrefix(trimmed, "@") { + path := strings.TrimSpace(strings.TrimPrefix(trimmed, "@")) + if path == "" { + return nil, fmt.Errorf("--input @path requires a file path") + } + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read input file %q: %w", path, err) + } + raw = data + } + var input map[string]interface{} + if err := json.Unmarshal(raw, &input); err != nil { + return nil, fmt.Errorf("parse --input JSON: %w", err) + } + return input, nil +} + func printExecutionActionHumanOutput(parsed map[string]any, successVerb string) { executionID, _ := parsed["execution_id"].(string) previousStatus, _ := parsed["previous_status"].(string) + newRunID, _ := parsed["run_id"].(string) + sourceRunID, _ := parsed["source_run_id"].(string) + sourceExecutionID, _ := parsed["source_execution_id"].(string) + reuse, _ := parsed["replay_mode"].(string) + + if successVerb == "restarted" && newRunID != "" { + if sourceRunID != "" && sourceExecutionID != "" { + fmt.Printf("Restarted run %s from %s\n", sourceRunID, sourceExecutionID) + } else if executionID != "" { + fmt.Printf("Execution %s restarted\n", executionID) + } + fmt.Printf("New run: %s\n", newRunID) + if reuse != "" { + fmt.Printf("Reuse: %s\n", reuse) + } + return + } if executionID != "" && previousStatus != "" { fmt.Printf("Execution %s %s (was: %s)\n", executionID, successVerb, previousStatus) diff --git a/control-plane/internal/cli/execution_additional_test.go b/control-plane/internal/cli/execution_additional_test.go index be18126a2..91e6d1d94 100644 --- a/control-plane/internal/cli/execution_additional_test.go +++ b/control-plane/internal/cli/execution_additional_test.go @@ -6,6 +6,7 @@ import ( "net/http" "net/http/httptest" "os" + "path/filepath" "testing" "time" @@ -220,3 +221,54 @@ func TestResumeExecutionCommand(t *testing.T) { }) require.Contains(t, output, "Execution ex-9 resumed") } + +func TestRestartExecutionCommandPostsRestartBody(t *testing.T) { + inputFile := filepath.Join(t.TempDir(), "input.json") + require.NoError(t, os.WriteFile(inputFile, []byte(`{"topic":"restart"}`), 0o644)) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, "/api/v1/executions/ex-10/restart", r.URL.Path) + require.Equal(t, "Bearer restart-token", r.Header.Get("Authorization")) + require.Equal(t, "application/json", r.Header.Get("Content-Type")) + var body map[string]any + require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) + require.Equal(t, "workflow", body["scope"]) + require.Equal(t, "none", body["reuse"]) + require.Equal(t, true, body["fork"]) + require.Equal(t, "try another model", body["reason"]) + require.Equal(t, map[string]any{"model": "google/gemini-3.1-flash-lite"}, body["context"]) + require.Equal(t, map[string]any{"topic": "restart"}, body["input"]) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "execution_id":"new-root", + "run_id":"run-new", + "source_run_id":"run-old", + "source_execution_id":"ex-10", + "replay_mode":"none" + }`)) + })) + defer server.Close() + + cmd := newRestartExecutionCommand() + cmd.SetArgs([]string{ + "ex-10", + "--server", server.URL, + "--token", "restart-token", + "--reuse", "none", + "--fork", + "--model", "google/gemini-3.1-flash-lite", + "--reason", "try another model", + "--input", "@" + inputFile, + }) + + output := captureOutput(t, func() { + require.NoError(t, cmd.Execute()) + }) + require.Contains(t, output, "Restarted run run-old from ex-10") + require.Contains(t, output, "New run: run-new") + require.Contains(t, output, "Reuse: none") + + cmd = newRestartExecutionCommand() + cmd.SetArgs([]string{"ex-10", "--input", "@"}) + require.ErrorContains(t, cmd.Execute(), "--input @path requires a file path") +} diff --git a/control-plane/internal/handlers/execute.go b/control-plane/internal/handlers/execute.go index e9197bc27..ee38ca2b3 100644 --- a/control-plane/internal/handlers/execute.go +++ b/control-plane/internal/handlers/execute.go @@ -125,6 +125,12 @@ type executionStatusUpdateRequest struct { Progress *int `json:"progress,omitempty"` } +type replayHit struct { + SourceExecutionID string + SourceRunID string + Result json.RawMessage +} + type executionController struct { store ExecutionStore httpClient *http.Client @@ -240,6 +246,26 @@ func (c *executionController) handleSync(ctx *gin.Context) { return } + if plan.replayHit != nil { + if err := c.completeReplayHit(reqCtx, plan); err != nil { + writeExecutionError(ctx, err) + return + } + ctx.Header("X-Execution-ID", plan.exec.ExecutionID) + ctx.Header("X-Run-ID", plan.exec.RunID) + ctx.Header("X-AgentField-Replay-Hit", plan.replayHit.SourceExecutionID) + ctx.JSON(http.StatusOK, ExecuteResponse{ + ExecutionID: plan.exec.ExecutionID, + RunID: plan.exec.RunID, + Status: types.ExecutionStatusSucceeded, + Result: decodeJSON(plan.replayHit.Result), + DurationMS: 0, + FinishedAt: time.Now().UTC().Format(time.RFC3339), + WebhookRegistered: plan.webhookRegistered, + }) + return + } + // Check LLM health and per-agent concurrency limits before proceeding if err := CheckExecutionPreconditions(plan.target.NodeID, plan.llmEndpoint); err != nil { _ = c.failExecution(reqCtx, plan, err, 0, nil) @@ -640,6 +666,35 @@ func (c *executionController) handleAsync(ctx *gin.Context) { return } + if plan.replayHit != nil { + if err := c.completeReplayHit(reqCtx, plan); err != nil { + writeExecutionError(ctx, err) + return + } + + createdAt := plan.exec.CreatedAt.UTC().Format(time.RFC3339) + targetLabel := fmt.Sprintf("%s.%s", plan.target.NodeID, plan.target.TargetName) + response := AsyncExecuteResponse{ + ExecutionID: plan.exec.ExecutionID, + RunID: plan.exec.RunID, + WorkflowID: plan.exec.RunID, + Status: string(types.ExecutionStatusSucceeded), + Target: targetLabel, + Type: plan.targetType, + CreatedAt: createdAt, + EnqueuedAt: createdAt, + WebhookRegistered: plan.webhookRegistered, + } + if plan.webhookError != nil { + response.WebhookError = plan.webhookError + } + ctx.Header("X-Execution-ID", plan.exec.ExecutionID) + ctx.Header("X-Run-ID", plan.exec.RunID) + ctx.Header("X-AgentField-Replay-Hit", plan.replayHit.SourceExecutionID) + ctx.JSON(http.StatusAccepted, response) + return + } + // Check LLM health and per-agent concurrency limits before proceeding if err := CheckExecutionPreconditions(plan.target.NodeID, plan.llmEndpoint); err != nil { _ = c.failExecution(reqCtx, plan, err, 0, nil) @@ -1281,20 +1336,35 @@ type preparedExecution struct { callerDID string targetDID string // Version that was selected during routing (empty if default/unversioned agent) - routedVersion string + routedVersion string + replaySourceRunID string + replayBeforeExecutionID string + replayMode string + replayHit *replayHit } func (c *executionController) prepareExecution(ctx context.Context, ginCtx *gin.Context) (*preparedExecution, error) { targetParam := ginCtx.Param("target") + var req ExecuteRequest + if err := ginCtx.ShouldBindJSON(&req); err != nil { + return nil, fmt.Errorf("invalid request body: %w", err) + } + return c.prepareExecutionForTarget( + ctx, + targetParam, + req, + readExecutionHeaders(ginCtx), + middleware.GetVerifiedCallerDID(ginCtx), + middleware.GetTargetDID(ginCtx), + ) +} + +func (c *executionController) prepareExecutionForTarget(ctx context.Context, targetParam string, req ExecuteRequest, headers executionHeaders, callerDID, targetDID string) (*preparedExecution, error) { target, err := parseTarget(targetParam) if err != nil { return nil, fmt.Errorf("invalid target: %w", err) } - var req ExecuteRequest - if err := ginCtx.ShouldBindJSON(&req); err != nil { - return nil, fmt.Errorf("invalid request body: %w", err) - } // Allow empty input for skills/reasoners that take no parameters (issue #196). if req.Input == nil { req.Input = map[string]interface{}{} @@ -1365,7 +1435,6 @@ func (c *executionController) prepareExecution(ctx context.Context, ginCtx *gin. } target.TargetType = targetType - headers := readExecutionHeaders(ginCtx) runID := headers.runID if runID == "" { runID = utils.GenerateRunID() @@ -1458,21 +1527,182 @@ func (c *executionController) prepareExecution(ctx context.Context, ginCtx *gin. c.ensureWorkflowExecutionRecord(ctx, exec, target, storedPayload) + hit, err := c.findReplayHit(ctx, headers, target, storedPayload) + if err != nil { + return nil, err + } + return &preparedExecution{ - exec: exec, - requestBody: agentPayloadBytes, - agent: agent, - target: target, - targetType: targetType, - llmEndpoint: extractRequestedLLMEndpoint(req), - webhookRegistered: webhookRegistered, - webhookError: webhookError, - callerDID: middleware.GetVerifiedCallerDID(ginCtx), - targetDID: middleware.GetTargetDID(ginCtx), - routedVersion: routedVersion, + exec: exec, + requestBody: agentPayloadBytes, + agent: agent, + target: target, + targetType: targetType, + llmEndpoint: extractRequestedLLMEndpoint(req), + webhookRegistered: webhookRegistered, + webhookError: webhookError, + callerDID: callerDID, + targetDID: targetDID, + routedVersion: routedVersion, + replaySourceRunID: headers.replaySourceRunID, + replayBeforeExecutionID: headers.replayBeforeExecutionID, + replayMode: headers.replayMode, + replayHit: hit, }, nil } +// findReplayHit returns a previously-succeeded child output to reuse for the +// current app.call, or nil to run it normally. Only child executions (those with +// a parent) are eligible — the restarted root always re-runs. +// +// Matching is keyed solely on (node id, reasoner id, canonical input+context); +// among matches the earliest-started succeeded source execution wins. This is +// intentionally position- and ordering-agnostic, so two calls to the same +// reasoner with identical input+context within a run will both reuse the first +// source result. That is correct for deterministic graphs; callers that need a +// distinct result per identical call should vary the input/context or restart +// with reuse=none. +func (c *executionController) findReplayHit(ctx context.Context, headers executionHeaders, target *parsedTarget, storedPayload []byte) (*replayHit, error) { + if target == nil || headers.parentExecutionID == nil { + return nil, nil + } + sourceRunID := strings.TrimSpace(headers.replaySourceRunID) + if sourceRunID == "" { + return nil, nil + } + mode := strings.TrimSpace(headers.replayMode) + if mode == "" { + mode = "succeeded-before" + } + if mode == "none" { + return nil, nil + } + if mode != "succeeded-before" && mode != "all-succeeded" { + return nil, fmt.Errorf("unsupported replay mode %q", mode) + } + + executions, err := c.store.QueryExecutionRecords(ctx, types.ExecutionFilter{ + RunID: &sourceRunID, + SortBy: "started_at", + SortDescending: false, + }) + if err != nil { + return nil, fmt.Errorf("query replay source run: %w", err) + } + if len(executions) == 0 { + return nil, nil + } + + var beforeTime *time.Time + if mode == "succeeded-before" && strings.TrimSpace(headers.replayBeforeExecutionID) != "" { + for _, exec := range executions { + if exec != nil && exec.ExecutionID == headers.replayBeforeExecutionID { + t := exec.StartedAt + beforeTime = &t + break + } + } + if beforeTime == nil { + return nil, nil + } + } + + newKey, ok := canonicalReplayPayload(storedPayload) + if !ok { + return nil, nil + } + for _, exec := range executions { + if exec == nil { + continue + } + if beforeTime != nil && !exec.StartedAt.Before(*beforeTime) { + continue + } + if exec.Status != types.ExecutionStatusSucceeded { + continue + } + if exec.NodeID != target.NodeID || exec.ReasonerID != target.TargetName { + continue + } + if len(exec.ResultPayload) == 0 { + continue + } + oldKey, oldOK := canonicalReplayPayload(exec.InputPayload) + if !oldOK || oldKey != newKey { + continue + } + return &replayHit{ + SourceExecutionID: exec.ExecutionID, + SourceRunID: exec.RunID, + Result: json.RawMessage(cloneBytes(exec.ResultPayload)), + }, nil + } + return nil, nil +} + +func canonicalReplayPayload(raw []byte) (string, bool) { + if len(raw) == 0 { + return "", false + } + var v interface{} + if err := json.Unmarshal(raw, &v); err != nil { + return "", false + } + encoded, err := json.Marshal(v) + if err != nil { + return "", false + } + return string(encoded), true +} + +func (c *executionController) completeReplayHit(ctx context.Context, plan *preparedExecution) error { + if plan == nil || plan.exec == nil || plan.replayHit == nil { + return fmt.Errorf("missing replay execution plan") + } + reason := "replayed_from_execution:" + plan.replayHit.SourceExecutionID + now := time.Now().UTC() + duration := int64(0) + result := cloneBytes(plan.replayHit.Result) + resultURI := c.savePayload(ctx, result) + + updated, err := c.store.UpdateExecutionRecord(ctx, plan.exec.ExecutionID, func(current *types.Execution) (*types.Execution, error) { + if current == nil { + return nil, fmt.Errorf("execution %s not found", plan.exec.ExecutionID) + } + current.Status = types.ExecutionStatusSucceeded + current.StatusReason = &reason + current.ResultPayload = json.RawMessage(result) + current.ResultURI = resultURI + current.ErrorMessage = nil + current.CompletedAt = &now + current.DurationMS = &duration + current.UpdatedAt = now + return current, nil + }) + if err != nil { + return err + } + + c.updateWorkflowExecutionFinalState(ctx, plan.exec.ExecutionID, types.ExecutionStatusSucceeded, result, 0, nil) + c.updateWorkflowExecutionStatus(ctx, plan.exec.ExecutionID, types.ExecutionStatusSucceeded, &reason) + if plan.webhookRegistered || (updated != nil && updated.WebhookRegistered) { + c.triggerWebhook(plan.exec.ExecutionID) + } + + eventData := map[string]interface{}{ + "replay": map[string]interface{}{ + "source_execution_id": plan.replayHit.SourceExecutionID, + "source_run_id": plan.replayHit.SourceRunID, + }, + "result": decodeJSON(result), + } + if inputPayload := decodeJSON(plan.exec.InputPayload); inputPayload != nil { + eventData["input"] = inputPayload + } + c.publishExecutionEventWithReasonerInfo(updated, string(types.ExecutionStatusSucceeded), eventData, plan.agent, &plan.target.TargetName) + return nil +} + func extractRequestedLLMEndpoint(req ExecuteRequest) string { for _, key := range []string{"llm_endpoint", "llm_backend", "backend", "provider", "model_provider"} { if value, ok := req.Context[key]; ok { @@ -1537,6 +1767,15 @@ func (c *executionController) callAgent(ctx context.Context, plan *preparedExecu if plan.targetDID != "" { req.Header.Set("X-Target-DID", plan.targetDID) } + if plan.replaySourceRunID != "" { + req.Header.Set("X-AgentField-Replay-Source-Run-ID", plan.replaySourceRunID) + } + if plan.replayBeforeExecutionID != "" { + req.Header.Set("X-AgentField-Replay-Before-Execution-ID", plan.replayBeforeExecutionID) + } + if plan.replayMode != "" { + req.Header.Set("X-AgentField-Replay-Mode", plan.replayMode) + } resp, err := c.httpClient.Do(req) if err != nil { @@ -1745,10 +1984,13 @@ func (c *executionController) triggerWebhook(executionID string) { } type executionHeaders struct { - runID string - parentExecutionID *string - sessionID *string - actorID *string + runID string + parentExecutionID *string + sessionID *string + actorID *string + replaySourceRunID string + replayBeforeExecutionID string + replayMode string } func readExecutionHeaders(ctx *gin.Context) executionHeaders { @@ -1756,6 +1998,9 @@ func readExecutionHeaders(ctx *gin.Context) executionHeaders { parent := strings.TrimSpace(ctx.GetHeader("X-Parent-Execution-ID")) session := strings.TrimSpace(ctx.GetHeader("X-Session-ID")) actor := strings.TrimSpace(ctx.GetHeader("X-Actor-ID")) + replaySourceRunID := strings.TrimSpace(ctx.GetHeader("X-AgentField-Replay-Source-Run-ID")) + replayBeforeExecutionID := strings.TrimSpace(ctx.GetHeader("X-AgentField-Replay-Before-Execution-ID")) + replayMode := strings.TrimSpace(ctx.GetHeader("X-AgentField-Replay-Mode")) var parentPtr *string if parent != "" { @@ -1773,10 +2018,13 @@ func readExecutionHeaders(ctx *gin.Context) executionHeaders { } return executionHeaders{ - runID: runID, - parentExecutionID: parentPtr, - sessionID: sessionPtr, - actorID: actorPtr, + runID: runID, + parentExecutionID: parentPtr, + sessionID: sessionPtr, + actorID: actorPtr, + replaySourceRunID: replaySourceRunID, + replayBeforeExecutionID: replayBeforeExecutionID, + replayMode: replayMode, } } diff --git a/control-plane/internal/handlers/execute_restart.go b/control-plane/internal/handlers/execute_restart.go new file mode 100644 index 000000000..82e7da070 --- /dev/null +++ b/control-plane/internal/handlers/execute_restart.go @@ -0,0 +1,287 @@ +package handlers + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "sort" + "strings" + "time" + + "github.com/Agent-Field/agentfield/control-plane/internal/logger" + "github.com/Agent-Field/agentfield/control-plane/internal/services" + "github.com/Agent-Field/agentfield/control-plane/internal/utils" + "github.com/Agent-Field/agentfield/control-plane/pkg/types" + + "github.com/gin-gonic/gin" +) + +type restartExecutionRequest struct { + Scope string `json:"scope,omitempty"` + Reuse string `json:"reuse,omitempty"` + Fork bool `json:"fork,omitempty"` + Reason string `json:"reason,omitempty"` + Input map[string]interface{} `json:"input,omitempty"` + Context map[string]interface{} `json:"context,omitempty"` + Webhook *WebhookRequest `json:"webhook,omitempty"` +} + +type restartExecutionResponse struct { + ExecutionID string `json:"execution_id"` + RunID string `json:"run_id"` + WorkflowID string `json:"workflow_id"` + Status string `json:"status"` + Target string `json:"target"` + Type string `json:"type"` + CreatedAt string `json:"created_at"` + EnqueuedAt string `json:"enqueued_at,omitempty"` + SourceExecutionID string `json:"source_execution_id"` + SourceRunID string `json:"source_run_id"` + RestartedExecutionID string `json:"restarted_execution_id"` + ReplayBeforeExecutionID *string `json:"replay_before_execution_id,omitempty"` + ReplayMode string `json:"replay_mode"` + Scope string `json:"scope"` + Kind string `json:"kind"` + WebhookRegistered bool `json:"webhook_registered"` + WebhookError *string `json:"webhook_error,omitempty"` +} + +type workflowRunMetadataStore interface { + StoreWorkflowRun(ctx context.Context, run *types.WorkflowRun) error +} + +// RestartExecutionHandler starts a new execution/run from an existing workflow +// point. The restarted code runs normally, while downstream app.call requests can +// reuse matching successful child outputs from the source run. +func RestartExecutionHandler(store ExecutionStore, payloads services.PayloadStore, webhooks services.WebhookDispatcher, timeout time.Duration, internalToken string) gin.HandlerFunc { + controller := newExecutionController(store, payloads, webhooks, timeout, internalToken) + return controller.handleRestart +} + +func (c *executionController) handleRestart(ctx *gin.Context) { + sourceExecutionID := strings.TrimSpace(ctx.Param("execution_id")) + if sourceExecutionID == "" { + ctx.JSON(http.StatusBadRequest, gin.H{"error": "execution_id is required"}) + return + } + + var req restartExecutionRequest + if err := ctx.ShouldBindJSON(&req); err != nil && !errors.Is(err, io.EOF) { + ctx.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("invalid request body: %v", err)}) + return + } + + scope := strings.TrimSpace(req.Scope) + if scope == "" { + scope = "workflow" + } + if scope != "workflow" && scope != "execution" { + ctx.JSON(http.StatusBadRequest, gin.H{"error": "scope must be one of: workflow, execution"}) + return + } + + reuse := strings.TrimSpace(req.Reuse) + if reuse == "" { + reuse = "succeeded-before" + } + if reuse != "succeeded-before" && reuse != "all-succeeded" && reuse != "none" { + ctx.JSON(http.StatusBadRequest, gin.H{"error": "reuse must be one of: succeeded-before, all-succeeded, none"}) + return + } + + reqCtx := ctx.Request.Context() + sourceExec, err := c.store.GetExecutionRecord(reqCtx, sourceExecutionID) + if err != nil { + logger.Logger.Error().Err(err).Str("execution_id", sourceExecutionID).Msg("restart: failed to load source execution") + ctx.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load source execution"}) + return + } + if sourceExec == nil { + ctx.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("execution %s not found", sourceExecutionID)}) + return + } + + restartExec := sourceExec + if scope == "workflow" { + root, rootErr := c.findWorkflowRestartRoot(reqCtx, sourceExec.RunID) + if rootErr != nil { + logger.Logger.Error().Err(rootErr).Str("run_id", sourceExec.RunID).Msg("restart: failed to find workflow root") + ctx.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load workflow root"}) + return + } + if root == nil { + ctx.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("run %s not found", sourceExec.RunID)}) + return + } + restartExec = root + } + + stored := types.DecodeStoredExecutionPayload(restartExec.InputPayload) + input := stored.Input + if input == nil { + input = map[string]interface{}{} + } + if req.Input != nil { + input = req.Input + } + contextPayload := stored.Context + if req.Context != nil { + contextPayload = req.Context + } + + newRunID := utils.GenerateRunID() + headers := executionHeaders{ + runID: newRunID, + sessionID: restartExec.SessionID, + actorID: restartExec.ActorID, + replaySourceRunID: sourceExec.RunID, + replayBeforeExecutionID: sourceExec.ExecutionID, + replayMode: reuse, + } + if reuse == "none" { + headers.replaySourceRunID = "" + headers.replayBeforeExecutionID = "" + } + if scope == "execution" && reuse == "succeeded-before" { + headers.replayMode = "all-succeeded" + headers.replayBeforeExecutionID = "" + } + + target := fmt.Sprintf("%s.%s", restartExec.NodeID, restartExec.ReasonerID) + plan, err := c.prepareExecutionForTarget(reqCtx, target, ExecuteRequest{ + Input: input, + Context: contextPayload, + Webhook: req.Webhook, + }, headers, "", "") + if err != nil { + writeExecutionError(ctx, err) + return + } + + if err := CheckExecutionPreconditions(plan.target.NodeID, plan.llmEndpoint); err != nil { + _ = c.failExecution(reqCtx, plan, err, 0, nil) + writeExecutionError(ctx, err) + return + } + + kind := "restart" + if req.Fork || req.Input != nil || req.Context != nil { + kind = "fork" + } + c.persistRestartRunMetadata(reqCtx, plan, sourceExec, restartExec, scope, reuse, kind, req.Reason) + + c.publishExecutionStartedEvent(plan) + + pool := getAsyncWorkerPool() + job := asyncExecutionJob{ + controller: c, + plan: *plan, + } + if ok := pool.submit(job); !ok { + ReleaseExecutionSlot(plan.target.NodeID) + queueErr := errors.New("async execution queue is full; retry later") + if updateErr := c.failExecution(reqCtx, plan, queueErr, 0, nil); updateErr != nil { + logger.Logger.Error().Err(updateErr).Str("execution_id", plan.exec.ExecutionID).Msg("restart: failed to persist queue saturation") + } + ctx.JSON(http.StatusServiceUnavailable, gin.H{"error": queueErr.Error(), "error_category": "concurrency_limit"}) + return + } + + createdAt := plan.exec.CreatedAt.UTC().Format(time.RFC3339) + var replayBefore *string + if headers.replayBeforeExecutionID != "" { + replayBefore = &headers.replayBeforeExecutionID + } + response := restartExecutionResponse{ + ExecutionID: plan.exec.ExecutionID, + RunID: plan.exec.RunID, + WorkflowID: plan.exec.RunID, + Status: string(types.ExecutionStatusQueued), + Target: target, + Type: plan.targetType, + CreatedAt: createdAt, + EnqueuedAt: createdAt, + SourceExecutionID: sourceExec.ExecutionID, + SourceRunID: sourceExec.RunID, + RestartedExecutionID: restartExec.ExecutionID, + ReplayBeforeExecutionID: replayBefore, + ReplayMode: headers.replayMode, + Scope: scope, + Kind: kind, + WebhookRegistered: plan.webhookRegistered, + WebhookError: plan.webhookError, + } + ctx.Header("X-Execution-ID", plan.exec.ExecutionID) + ctx.Header("X-Run-ID", plan.exec.RunID) + ctx.JSON(http.StatusAccepted, response) +} + +func (c *executionController) findWorkflowRestartRoot(ctx context.Context, runID string) (*types.Execution, error) { + executions, err := c.store.QueryExecutionRecords(ctx, types.ExecutionFilter{ + RunID: &runID, + SortBy: "started_at", + SortDescending: false, + }) + if err != nil || len(executions) == 0 { + return nil, err + } + sort.SliceStable(executions, func(i, j int) bool { + return executions[i].StartedAt.Before(executions[j].StartedAt) + }) + for _, exec := range executions { + if exec != nil && (exec.ParentExecutionID == nil || strings.TrimSpace(*exec.ParentExecutionID) == "") { + return exec, nil + } + } + return executions[0], nil +} + +func (c *executionController) persistRestartRunMetadata(ctx context.Context, plan *preparedExecution, sourceExec, restartExec *types.Execution, scope, reuse, kind, reason string) { + if plan == nil || plan.exec == nil || sourceExec == nil || restartExec == nil { + return + } + store, ok := c.store.(workflowRunMetadataStore) + if !ok { + return + } + now := time.Now().UTC() + metadata := map[string]interface{}{ + "lineage": map[string]interface{}{ + "kind": kind, + "source_run_id": sourceExec.RunID, + "source_execution_id": sourceExec.ExecutionID, + "restarted_execution_id": restartExec.ExecutionID, + "reuse": reuse, + "scope": scope, + }, + } + if trimmed := strings.TrimSpace(reason); trimmed != "" { + metadata["reason"] = trimmed + } + encoded, err := json.Marshal(metadata) + if err != nil { + logger.Logger.Warn().Err(err).Str("run_id", plan.exec.RunID).Msg("failed to encode restart run metadata") + return + } + // This workflow_runs row exists only to carry lineage/golden metadata for the + // new run; it is the sole writer of this row for restart runs. Status and + // TotalSteps are seeded at enqueue time and are NOT kept current as the run + // progresses — every UI read path (run list, run detail, DAG) derives live + // status and step counts from execution aggregation and only reads the + // lineage/golden fields here. Do not treat these columns as authoritative. + if err := store.StoreWorkflowRun(ctx, &types.WorkflowRun{ + RunID: plan.exec.RunID, + RootWorkflowID: plan.exec.RunID, + RootExecutionID: &plan.exec.ExecutionID, + Status: string(types.ExecutionStatusQueued), + TotalSteps: 1, + Metadata: json.RawMessage(encoded), + CreatedAt: now, + UpdatedAt: now, + }); err != nil { + logger.Logger.Warn().Err(err).Str("run_id", plan.exec.RunID).Msg("failed to persist restart run metadata") + } +} diff --git a/control-plane/internal/handlers/execute_restart_test.go b/control-plane/internal/handlers/execute_restart_test.go new file mode 100644 index 000000000..c9c93ea7e --- /dev/null +++ b/control-plane/internal/handlers/execute_restart_test.go @@ -0,0 +1,562 @@ +package handlers + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/Agent-Field/agentfield/control-plane/internal/services" + "github.com/Agent-Field/agentfield/control-plane/pkg/types" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestExecuteAsyncHandler_ReplaysMatchingSucceededChildCall(t *testing.T) { + gin.SetMode(gin.TestMode) + + var agentCalls int32 + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&agentCalls, 1) + w.WriteHeader(http.StatusInternalServerError) + })) + defer agentServer.Close() + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-child", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"foo":"bar"}}`), + ResultPayload: json.RawMessage(`{"answer":42}`), + StartedAt: now.Add(-2 * time.Minute), + CreatedAt: now.Add(-2 * time.Minute), + UpdatedAt: now.Add(-2 * time.Minute), + }) + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-failed", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"step":"failed"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + + router := gin.New() + router.POST("/api/v1/execute/async/:target", ExecuteAsyncHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/execute/async/node-1.reasoner-a", strings.NewReader(`{"input":{"foo":"bar"}}`)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Run-ID", "new-run") + req.Header.Set("X-Parent-Execution-ID", "new-parent") + req.Header.Set("X-AgentField-Replay-Source-Run-ID", "old-run") + req.Header.Set("X-AgentField-Replay-Before-Execution-ID", "old-failed") + req.Header.Set("X-AgentField-Replay-Mode", "succeeded-before") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusAccepted, resp.Code) + require.Equal(t, "old-child", resp.Header().Get("X-AgentField-Replay-Hit")) + require.EqualValues(t, 0, atomic.LoadInt32(&agentCalls)) + + var payload AsyncExecuteResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, string(types.ExecutionStatusSucceeded), payload.Status) + + record, err := store.GetExecutionRecord(context.Background(), payload.ExecutionID) + require.NoError(t, err) + require.NotNil(t, record) + require.Equal(t, types.ExecutionStatusSucceeded, record.Status) + require.JSONEq(t, `{"answer":42}`, string(record.ResultPayload)) + require.NotNil(t, record.StatusReason) + require.Equal(t, "replayed_from_execution:old-child", *record.StatusReason) + + workflowRecord, err := store.GetWorkflowExecution(context.Background(), payload.ExecutionID) + require.NoError(t, err) + require.NotNil(t, workflowRecord) + require.Equal(t, string(types.ExecutionStatusSucceeded), workflowRecord.Status) +} + +func TestExecuteHandler_ReplaysMatchingSucceededChildCall(t *testing.T) { + gin.SetMode(gin.TestMode) + + var agentCalls int32 + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&agentCalls, 1) + w.WriteHeader(http.StatusInternalServerError) + })) + defer agentServer.Close() + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-sync-child", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"foo":"bar"}}`), + ResultPayload: json.RawMessage(`{"answer":42}`), + StartedAt: now.Add(-2 * time.Minute), + CreatedAt: now.Add(-2 * time.Minute), + UpdatedAt: now.Add(-2 * time.Minute), + }) + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-sync-failed", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"step":"failed"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + + router := gin.New() + router.POST("/api/v1/execute/:target", ExecuteHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/execute/node-1.reasoner-a", strings.NewReader(`{"input":{"foo":"bar"}}`)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Run-ID", "new-run") + req.Header.Set("X-Parent-Execution-ID", "new-parent") + req.Header.Set("X-AgentField-Replay-Source-Run-ID", "old-run") + req.Header.Set("X-AgentField-Replay-Before-Execution-ID", "old-sync-failed") + req.Header.Set("X-AgentField-Replay-Mode", "succeeded-before") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusOK, resp.Code) + require.Equal(t, "old-sync-child", resp.Header().Get("X-AgentField-Replay-Hit")) + require.EqualValues(t, 0, atomic.LoadInt32(&agentCalls)) + + var payload ExecuteResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, string(types.ExecutionStatusSucceeded), payload.Status) + require.Equal(t, map[string]interface{}{"answer": float64(42)}, payload.Result) + require.Zero(t, payload.DurationMS) +} + +func TestExecuteHandler_DoesNotReplaySucceededChildAfterMarker(t *testing.T) { + gin.SetMode(gin.TestMode) + + var agentCalls int32 + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&agentCalls, 1) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"fresh":true}`)) + })) + defer agentServer.Close() + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-failed", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"step":"failed"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-child-late", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"foo":"bar"}}`), + ResultPayload: json.RawMessage(`{"answer":42}`), + StartedAt: now.Add(time.Minute), + CreatedAt: now.Add(time.Minute), + UpdatedAt: now.Add(time.Minute), + }) + + router := gin.New() + router.POST("/api/v1/execute/:target", ExecuteHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/execute/node-1.reasoner-a", strings.NewReader(`{"input":{"foo":"bar"}}`)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Run-ID", "new-run") + req.Header.Set("X-Parent-Execution-ID", "new-parent") + req.Header.Set("X-AgentField-Replay-Source-Run-ID", "old-run") + req.Header.Set("X-AgentField-Replay-Before-Execution-ID", "old-failed") + req.Header.Set("X-AgentField-Replay-Mode", "succeeded-before") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusOK, resp.Code) + require.Empty(t, resp.Header().Get("X-AgentField-Replay-Hit")) + require.EqualValues(t, 1, atomic.LoadInt32(&agentCalls)) + var payload ExecuteResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, resp.Header().Get("X-Execution-ID"), payload.ExecutionID) + require.Equal(t, "new-run", payload.RunID) + require.Equal(t, string(types.ExecutionStatusSucceeded), payload.Status) + require.True(t, payload.DurationMS >= 0) + require.NotEmpty(t, payload.FinishedAt) + require.Equal(t, map[string]interface{}{"fresh": true}, payload.Result) +} + +func TestExecuteHandler_AllSucceededReplaysMatchingChildAfterMarker(t *testing.T) { + gin.SetMode(gin.TestMode) + + var agentCalls int32 + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&agentCalls, 1) + w.WriteHeader(http.StatusInternalServerError) + })) + defer agentServer.Close() + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-failed", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"step":"failed"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-child-late", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"foo":"bar"}}`), + ResultPayload: json.RawMessage(`{"answer":42}`), + StartedAt: now.Add(time.Minute), + CreatedAt: now.Add(time.Minute), + UpdatedAt: now.Add(time.Minute), + }) + + router := gin.New() + router.POST("/api/v1/execute/:target", ExecuteHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/execute/node-1.reasoner-a", strings.NewReader(`{"input":{"foo":"bar"}}`)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Run-ID", "new-run") + req.Header.Set("X-Parent-Execution-ID", "new-parent") + req.Header.Set("X-AgentField-Replay-Source-Run-ID", "old-run") + req.Header.Set("X-AgentField-Replay-Before-Execution-ID", "old-failed") + req.Header.Set("X-AgentField-Replay-Mode", "all-succeeded") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusOK, resp.Code) + require.Equal(t, "old-child-late", resp.Header().Get("X-AgentField-Replay-Hit")) + require.EqualValues(t, 0, atomic.LoadInt32(&agentCalls)) + + var payload ExecuteResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, string(types.ExecutionStatusSucceeded), payload.Status) + require.Equal(t, map[string]interface{}{"answer": float64(42)}, payload.Result) +} + +func TestRestartExecutionHandler_ForwardsReplayHeadersToRestartedRoot(t *testing.T) { + gin.SetMode(gin.TestMode) + + var ( + mu sync.Mutex + headers http.Header + body string + ) + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + headers = r.Header.Clone() + raw := make([]byte, r.ContentLength) + _, _ = r.Body.Read(raw) + body = string(raw) + mu.Unlock() + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"root":"ok"}`)) + })) + defer agentServer.Close() + + withTestAsyncPool(t) + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-root", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"topic":"restart"},"context":{"priority":"high"}}`), + ResultPayload: json.RawMessage(`{"root":"old"}`), + StartedAt: now.Add(-2 * time.Minute), + CreatedAt: now.Add(-2 * time.Minute), + UpdatedAt: now.Add(-2 * time.Minute), + }) + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-failed-child", + RunID: "old-run", + ParentExecutionID: pointerString("old-root"), + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"child":"boom"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + + router := gin.New() + router.POST("/api/v1/executions/:execution_id/restart", RestartExecutionHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/executions/old-failed-child/restart", strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusAccepted, resp.Code) + var payload restartExecutionResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, "old-failed-child", payload.SourceExecutionID) + require.Equal(t, "old-root", payload.RestartedExecutionID) + require.Equal(t, "succeeded-before", payload.ReplayMode) + require.NotEqual(t, "old-run", payload.RunID) + + require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() + return headers.Get("X-AgentField-Replay-Source-Run-ID") == "old-run" + }, 2*time.Second, 20*time.Millisecond) + + mu.Lock() + defer mu.Unlock() + require.Equal(t, "old-failed-child", headers.Get("X-AgentField-Replay-Before-Execution-ID")) + require.Equal(t, "succeeded-before", headers.Get("X-AgentField-Replay-Mode")) + require.Equal(t, payload.RunID, headers.Get("X-Run-ID")) + require.JSONEq(t, `{"topic":"restart"}`, body) +} + +func TestRestartExecutionHandler_ReuseNoneDoesNotForwardReplayHeaders(t *testing.T) { + gin.SetMode(gin.TestMode) + + var ( + mu sync.Mutex + headers http.Header + ) + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + headers = r.Header.Clone() + mu.Unlock() + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"root":"ok"}`)) + })) + defer agentServer.Close() + + withTestAsyncPool(t) + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-root", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-a", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"topic":"restart"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + + router := gin.New() + router.POST("/api/v1/executions/:execution_id/restart", RestartExecutionHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/executions/old-root/restart", strings.NewReader(`{"reuse":"none"}`)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusAccepted, resp.Code) + var payload restartExecutionResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, "none", payload.ReplayMode) + require.Nil(t, payload.ReplayBeforeExecutionID) + + require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() + return headers.Get("X-Run-ID") == payload.RunID + }, 2*time.Second, 20*time.Millisecond) + + mu.Lock() + defer mu.Unlock() + require.Empty(t, headers.Get("X-AgentField-Replay-Source-Run-ID")) + require.Empty(t, headers.Get("X-AgentField-Replay-Before-Execution-ID")) + require.Equal(t, "none", headers.Get("X-AgentField-Replay-Mode")) +} + +func TestRestartExecutionHandler_ExecutionScopeUsesAllSucceededReplay(t *testing.T) { + gin.SetMode(gin.TestMode) + + var ( + mu sync.Mutex + headers http.Header + ) + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + headers = r.Header.Clone() + mu.Unlock() + + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"child":"ok"}`)) + })) + defer agentServer.Close() + + withTestAsyncPool(t) + + store := newTestExecutionStorage(testRestartAgent(agentServer.URL)) + now := time.Now().UTC() + seedExecutionRecord(t, store, &types.Execution{ + ExecutionID: "old-child", + RunID: "old-run", + AgentNodeID: "node-1", + NodeID: "node-1", + ReasonerID: "reasoner-b", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{"child":"retry"}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + }) + + router := gin.New() + router.POST("/api/v1/executions/:execution_id/restart", RestartExecutionHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + req := httptest.NewRequest(http.MethodPost, "/api/v1/executions/old-child/restart", strings.NewReader(`{"scope":"execution"}`)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusAccepted, resp.Code) + var payload restartExecutionResponse + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &payload)) + require.Equal(t, "old-child", payload.RestartedExecutionID) + require.Equal(t, "all-succeeded", payload.ReplayMode) + require.Nil(t, payload.ReplayBeforeExecutionID) + + require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() + return headers.Get("X-AgentField-Replay-Source-Run-ID") == "old-run" + }, 2*time.Second, 20*time.Millisecond) + + mu.Lock() + defer mu.Unlock() + require.Empty(t, headers.Get("X-AgentField-Replay-Before-Execution-ID")) + require.Equal(t, "all-succeeded", headers.Get("X-AgentField-Replay-Mode")) +} + +func TestRestartExecutionHandlerValidationErrors(t *testing.T) { + gin.SetMode(gin.TestMode) + + store := newTestExecutionStorage(testRestartAgent("http://agent.test")) + router := gin.New() + router.POST("/api/v1/executions/:execution_id/restart", RestartExecutionHandler(store, services.NewFilePayloadStore(t.TempDir()), nil, 90*time.Second, "")) + + tests := []struct { + name string + body string + want int + }{ + {name: "invalid json", body: `{"scope":`, want: http.StatusBadRequest}, + {name: "invalid scope", body: `{"scope":"node"}`, want: http.StatusBadRequest}, + {name: "invalid reuse", body: `{"reuse":"cached"}`, want: http.StatusBadRequest}, + {name: "missing source execution", body: `{}`, want: http.StatusNotFound}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/api/v1/executions/missing/restart", strings.NewReader(tt.body)) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + + router.ServeHTTP(resp, req) + + require.Equal(t, tt.want, resp.Code) + require.Contains(t, resp.Body.String(), "error") + }) + } +} + +func TestExecutionReuseInfoParsesReplayStatusReason(t *testing.T) { + reason := "replayed_from_execution:source-exec" + info := executionReuseInfo(&types.Execution{StatusReason: &reason}) + require.NotNil(t, info) + require.True(t, info.Hit) + require.Equal(t, "source-exec", info.SourceExecutionID) + + plain := "ordinary status" + require.Nil(t, executionReuseInfo(&types.Execution{StatusReason: &plain})) + require.Nil(t, executionReuseInfo(nil)) +} + +func testRestartAgent(baseURL string) *types.AgentNode { + return &types.AgentNode{ + ID: "node-1", + BaseURL: baseURL, + Reasoners: []types.ReasonerDefinition{{ID: "reasoner-a"}, {ID: "reasoner-b"}}, + HealthStatus: types.HealthStatusActive, + LifecycleStatus: types.AgentStatusReady, + } +} + +func seedExecutionRecord(t *testing.T, store *testExecutionStorage, exec *types.Execution) { + t.Helper() + require.NoError(t, store.CreateExecutionRecord(context.Background(), exec)) +} + +func withTestAsyncPool(t *testing.T) { + t.Helper() + prevAsyncPool := asyncPool + asyncPool = newAsyncWorkerPool(1, 8) + asyncPoolOnce = sync.Once{} + asyncPoolOnce.Do(func() {}) + t.Cleanup(func() { + asyncPool = prevAsyncPool + asyncPoolOnce = sync.Once{} + }) +} diff --git a/control-plane/internal/handlers/ui/coverage_execution_workflow_handlers_test.go b/control-plane/internal/handlers/ui/coverage_execution_workflow_handlers_test.go index 33c66fb4d..6b3fe9447 100644 --- a/control-plane/internal/handlers/ui/coverage_execution_workflow_handlers_test.go +++ b/control-plane/internal/handlers/ui/coverage_execution_workflow_handlers_test.go @@ -3,6 +3,7 @@ package ui import ( "context" "encoding/json" + "errors" "net/http" "net/http/httptest" "path/filepath" @@ -121,20 +122,20 @@ func seedWorkflowExecutions(t *testing.T, ls *storage.LocalStorage, ctx context. }, })) require.NoError(t, ls.CreateExecutionRecord(ctx, &types.Execution{ - ExecutionID: rootExecID, - RunID: runID, - AgentNodeID: "agent-alpha", - ReasonerID: "planner", - NodeID: "agent-alpha", - InputPayload: json.RawMessage(`{"error":"corrupted_json_data","preview":"partial"}`), + ExecutionID: rootExecID, + RunID: runID, + AgentNodeID: "agent-alpha", + ReasonerID: "planner", + NodeID: "agent-alpha", + InputPayload: json.RawMessage(`{"error":"corrupted_json_data","preview":"partial"}`), ResultPayload: json.RawMessage(`{"result":"ok"}`), - Status: string(types.ExecutionStatusWaiting), - StatusReason: &waitReason, - StartedAt: now, - CompletedAt: &rootCompleted, - DurationMS: &rootDuration, - SessionID: &sessionID, - ActorID: &actorID, + Status: string(types.ExecutionStatusWaiting), + StatusReason: &waitReason, + StartedAt: now, + CompletedAt: &rootCompleted, + DurationMS: &rootDuration, + SessionID: &sessionID, + ActorID: &actorID, Notes: []types.ExecutionNote{ {Message: "queued", Timestamp: now.Add(10 * time.Second)}, {Message: "awaiting approval", Timestamp: now.Add(20 * time.Second)}, @@ -412,6 +413,168 @@ func TestWorkflowRunHandlerRealStorageCoverage(t *testing.T) { }) } +func TestWorkflowRunHandlerSaveGoldenRunPreservesLineageMetadata(t *testing.T) { + gin.SetMode(gin.TestMode) + + ls, ctx := setupUIHandlerStorage(t) + runID := "run-golden" + rootExecutionID := "exec-golden-root" + childExecutionID := "exec-golden-child" + now := time.Date(2026, 4, 9, 15, 0, 0, 0, time.UTC) + completed := now.Add(3 * time.Second) + durationMS := int64(3000) + + require.NoError(t, ls.StoreWorkflowRun(ctx, &types.WorkflowRun{ + RunID: runID, + RootExecutionID: &rootExecutionID, + Status: string(types.ExecutionStatusSucceeded), + Metadata: json.RawMessage(`{ + "lineage": { + "kind": "fork", + "source_run_id": "run-source", + "source_execution_id": "exec-source", + "restarted_execution_id": "exec-source-root", + "reuse": "succeeded-before", + "scope": "workflow" + } + }`), + CreatedAt: now, + UpdatedAt: completed, + })) + require.NoError(t, ls.CreateExecutionRecord(ctx, &types.Execution{ + ExecutionID: rootExecutionID, + RunID: runID, + AgentNodeID: "agent-alpha", + ReasonerID: "planner", + NodeID: "agent-alpha", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"topic":"restart"}}`), + ResultPayload: json.RawMessage(`{"plan":"ok"}`), + StartedAt: now, + CompletedAt: &completed, + DurationMS: &durationMS, + CreatedAt: now, + UpdatedAt: completed, + })) + require.NoError(t, ls.CreateExecutionRecord(ctx, &types.Execution{ + ExecutionID: childExecutionID, + ParentExecutionID: &rootExecutionID, + RunID: runID, + AgentNodeID: "agent-alpha", + ReasonerID: "writer", + NodeID: "agent-alpha", + Status: types.ExecutionStatusSucceeded, + InputPayload: json.RawMessage(`{"input":{"plan":"ok"}}`), + ResultPayload: json.RawMessage(`{"draft":"ok"}`), + StartedAt: now.Add(time.Second), + CompletedAt: &completed, + DurationMS: &durationMS, + CreatedAt: now.Add(time.Second), + UpdatedAt: completed, + })) + + handler := NewWorkflowRunHandler(ls) + router := gin.New() + router.POST("/api/ui/v1/workflow-runs/:run_id/golden", handler.SaveGoldenRunHandler) + + req := httptest.NewRequest( + http.MethodPost, + "/api/ui/v1/workflow-runs/run-golden/golden", + strings.NewReader(`{"name":"Release baseline","tags":[" smoke ","smoke","","restart"]}`), + ) + req.Header.Set("Content-Type", "application/json") + resp := httptest.NewRecorder() + router.ServeHTTP(resp, req) + + require.Equal(t, http.StatusOK, resp.Code) + var summary WorkflowRunSummary + require.NoError(t, json.Unmarshal(resp.Body.Bytes(), &summary)) + require.NotNil(t, summary.Lineage) + require.Equal(t, "run-source", summary.Lineage.SourceRunID) + require.NotNil(t, summary.Golden) + require.Equal(t, "Release baseline", summary.Golden.Name) + require.Equal(t, []string{"smoke", "restart"}, summary.Golden.Tags) + + run, err := ls.GetWorkflowRun(ctx, runID) + require.NoError(t, err) + require.NotNil(t, run) + metadata := decodeWorkflowRunMetadata(run.Metadata) + require.Contains(t, metadata, "lineage") + require.Contains(t, metadata, "golden") +} + +func TestWorkflowRunHandlerSaveGoldenRunErrors(t *testing.T) { + gin.SetMode(gin.TestMode) + + ls, ctx := setupUIHandlerStorage(t) + handler := NewWorkflowRunHandler(ls) + router := gin.New() + router.POST("/api/ui/v1/workflow-runs/:run_id/golden", handler.SaveGoldenRunHandler) + + t.Run("missing run id", func(t *testing.T) { + rec := httptest.NewRecorder() + ginCtx, _ := gin.CreateTestContext(rec) + ginCtx.Request = httptest.NewRequest(http.MethodPost, "/golden", strings.NewReader(`{}`)) + handler.SaveGoldenRunHandler(ginCtx) + require.Equal(t, http.StatusBadRequest, rec.Code) + }) + + t.Run("invalid json", func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/ui/v1/workflow-runs/run-missing/golden", strings.NewReader(`{"name":`)) + req.Header.Set("Content-Type", "application/json") + router.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + }) + + t.Run("missing run", func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/ui/v1/workflow-runs/run-missing/golden", strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + router.ServeHTTP(rec, req) + require.Equal(t, http.StatusNotFound, rec.Code) + }) + + t.Run("failed run cannot be golden", func(t *testing.T) { + now := time.Date(2026, 4, 9, 16, 0, 0, 0, time.UTC) + require.NoError(t, ls.CreateExecutionRecord(ctx, &types.Execution{ + ExecutionID: "exec-failed-golden", + RunID: "run-failed-golden", + AgentNodeID: "agent-alpha", + NodeID: "agent-alpha", + ReasonerID: "planner", + Status: types.ExecutionStatusFailed, + InputPayload: json.RawMessage(`{"input":{}}`), + StartedAt: now, + CreatedAt: now, + UpdatedAt: now, + })) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/ui/v1/workflow-runs/run-failed-golden/golden", strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + router.ServeHTTP(rec, req) + require.Equal(t, http.StatusConflict, rec.Code) + }) + + t.Run("query error", func(t *testing.T) { + errorHandler := NewWorkflowRunHandler(&workflowRunOverrideStorage{ + StorageProvider: ls, + queryExecutionRecordsFn: func(context.Context, types.ExecutionFilter) ([]*types.Execution, error) { + return nil, errors.New("query failed") + }, + }) + errorRouter := gin.New() + errorRouter.POST("/api/ui/v1/workflow-runs/:run_id/golden", errorHandler.SaveGoldenRunHandler) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/ui/v1/workflow-runs/run-any/golden", strings.NewReader(`{}`)) + req.Header.Set("Content-Type", "application/json") + errorRouter.ServeHTTP(rec, req) + require.Equal(t, http.StatusInternalServerError, rec.Code) + }) +} + func TestExecutionHandlerHelperCoverage(t *testing.T) { t.Run("parse time pointer", func(t *testing.T) { parsed, err := parseTimePtrValue("2026-04-08T12:00:00Z") diff --git a/control-plane/internal/handlers/ui/coverage_low_funcs_additional_test.go b/control-plane/internal/handlers/ui/coverage_low_funcs_additional_test.go index a2d89d4ac..a18d0be93 100644 --- a/control-plane/internal/handlers/ui/coverage_low_funcs_additional_test.go +++ b/control-plane/internal/handlers/ui/coverage_low_funcs_additional_test.go @@ -26,9 +26,9 @@ import ( type workflowRunOverrideStorage struct { storage.StorageProvider - queryRunSummariesFn func(context.Context, types.ExecutionFilter) ([]*storage.RunSummaryAggregation, int, error) + queryRunSummariesFn func(context.Context, types.ExecutionFilter) ([]*storage.RunSummaryAggregation, int, error) queryExecutionRecordsFn func(context.Context, types.ExecutionFilter) ([]*types.Execution, error) - getWorkflowExecutionFn func(context.Context, string) (*types.WorkflowExecution, error) + getWorkflowExecutionFn func(context.Context, string) (*types.WorkflowExecution, error) } func (s *workflowRunOverrideStorage) QueryRunSummaries(ctx context.Context, filter types.ExecutionFilter) ([]*storage.RunSummaryAggregation, int, error) { @@ -52,6 +52,18 @@ func (s *workflowRunOverrideStorage) GetWorkflowExecution(ctx context.Context, e return s.StorageProvider.GetWorkflowExecution(ctx, executionID) } +func (s *workflowRunOverrideStorage) StoreWorkflowRun(ctx context.Context, run *types.WorkflowRun) error { + return s.StorageProvider.(interface { + StoreWorkflowRun(context.Context, *types.WorkflowRun) error + }).StoreWorkflowRun(ctx, run) +} + +func (s *workflowRunOverrideStorage) GetWorkflowRun(ctx context.Context, runID string) (*types.WorkflowRun, error) { + return s.StorageProvider.(interface { + GetWorkflowRun(context.Context, string) (*types.WorkflowRun, error) + }).GetWorkflowRun(ctx, runID) +} + type executionRecordOverrideStore struct { queryFn func(context.Context, types.ExecutionFilter) ([]*types.Execution, error) getFn func(context.Context, string) (*types.Execution, error) @@ -630,10 +642,10 @@ func TestReasonersAndObservabilityLowCoveragePaths(t *testing.T) { storage.listAgentsFn = func(context.Context, types.AgentFilters) ([]*types.AgentNode, error) { return []*types.AgentNode{{ - ID: "agent-offline", - Version: "v1", - HealthStatus: types.HealthStatusInactive, - Reasoners: []types.ReasonerDefinition{{ID: "plan"}}, + ID: "agent-offline", + Version: "v1", + HealthStatus: types.HealthStatusInactive, + Reasoners: []types.ReasonerDefinition{{ID: "plan"}}, LastHeartbeat: time.Now().UTC(), }}, nil } diff --git a/control-plane/internal/handlers/ui/workflow_runs.go b/control-plane/internal/handlers/ui/workflow_runs.go index 56aad5bbf..ad5e98342 100644 --- a/control-plane/internal/handlers/ui/workflow_runs.go +++ b/control-plane/internal/handlers/ui/workflow_runs.go @@ -2,6 +2,8 @@ package ui import ( "context" + "encoding/json" + "fmt" "net/http" "sort" "strconv" @@ -25,39 +27,57 @@ func NewWorkflowRunHandler(storage storage.StorageProvider) *WorkflowRunHandler } type WorkflowRunSummary struct { - WorkflowID string `json:"workflow_id"` - RunID string `json:"run_id"` - RootExecutionID string `json:"root_execution_id"` + WorkflowID string `json:"workflow_id"` + RunID string `json:"run_id"` + RootExecutionID string `json:"root_execution_id"` // RootExecutionStatus is the status of the root execution row, which is // the unit the user actually controls via Pause/Resume/Cancel. The // aggregate Status field above can drift from this when in-flight // children are still running after the user pauses or cancels the // root — see execute.go's dispatch-time guard for the full story. - RootExecutionStatus string `json:"root_execution_status,omitempty"` - RootErrorCategory string `json:"root_error_category,omitempty"` - RootErrorMessage string `json:"root_error_message,omitempty"` - Status string `json:"status"` - DisplayName string `json:"display_name"` - CurrentTask string `json:"current_task"` - RootReasoner string `json:"root_reasoner"` - AgentID *string `json:"agent_id,omitempty"` - SessionID *string `json:"session_id,omitempty"` - ActorID *string `json:"actor_id,omitempty"` - TotalExecutions int `json:"total_executions"` - MaxDepth int `json:"max_depth"` - ActiveExecutions int `json:"active_executions"` - StatusCounts map[string]int `json:"status_counts"` - StartedAt time.Time `json:"started_at"` - UpdatedAt time.Time `json:"updated_at"` - CompletedAt *time.Time `json:"completed_at,omitempty"` - DurationMs *int64 `json:"duration_ms,omitempty"` - LatestActivity time.Time `json:"latest_activity"` - Terminal bool `json:"terminal"` + RootExecutionStatus string `json:"root_execution_status,omitempty"` + RootErrorCategory string `json:"root_error_category,omitempty"` + RootErrorMessage string `json:"root_error_message,omitempty"` + Status string `json:"status"` + DisplayName string `json:"display_name"` + CurrentTask string `json:"current_task"` + RootReasoner string `json:"root_reasoner"` + AgentID *string `json:"agent_id,omitempty"` + SessionID *string `json:"session_id,omitempty"` + ActorID *string `json:"actor_id,omitempty"` + TotalExecutions int `json:"total_executions"` + MaxDepth int `json:"max_depth"` + ActiveExecutions int `json:"active_executions"` + StatusCounts map[string]int `json:"status_counts"` + StartedAt time.Time `json:"started_at"` + UpdatedAt time.Time `json:"updated_at"` + CompletedAt *time.Time `json:"completed_at,omitempty"` + DurationMs *int64 `json:"duration_ms,omitempty"` + LatestActivity time.Time `json:"latest_activity"` + Terminal bool `json:"terminal"` // Trigger describes the inbound webhook (or schedule) that originated // this run, when one exists. Populated by walking the root execution's // VC chain back to the parent trigger_event VC. Nil for runs invoked // directly or by another reasoner. Trigger *types.TriggerEventMetadata `json:"trigger,omitempty"` + Lineage *RunLineageMetadata `json:"lineage,omitempty"` + Golden *GoldenRunMetadata `json:"golden,omitempty"` +} + +type RunLineageMetadata struct { + Kind string `json:"kind,omitempty"` + SourceRunID string `json:"source_run_id,omitempty"` + SourceExecutionID string `json:"source_execution_id,omitempty"` + RestartedExecutionID string `json:"restarted_execution_id,omitempty"` + Reuse string `json:"reuse,omitempty"` + Scope string `json:"scope,omitempty"` +} + +type GoldenRunMetadata struct { + Name string `json:"name,omitempty"` + Tags []string `json:"tags,omitempty"` + SavedBy string `json:"saved_by,omitempty"` + SavedAt string `json:"saved_at,omitempty"` } type WorkflowRunListResponse struct { @@ -70,22 +90,38 @@ type WorkflowRunListResponse struct { type WorkflowRunDetailResponse struct { Run struct { - RunID string `json:"run_id"` - RootWorkflowID string `json:"root_workflow_id"` - RootExecutionID string `json:"root_execution_id,omitempty"` - Status string `json:"status"` - TotalSteps int `json:"total_steps"` - CompletedSteps int `json:"completed_steps"` - FailedSteps int `json:"failed_steps"` - ReturnedSteps int `json:"returned_steps"` - StatusCounts map[string]int `json:"status_counts,omitempty"` - CreatedAt string `json:"created_at"` - UpdatedAt string `json:"updated_at"` - CompletedAt *string `json:"completed_at,omitempty"` + RunID string `json:"run_id"` + RootWorkflowID string `json:"root_workflow_id"` + RootExecutionID string `json:"root_execution_id,omitempty"` + Status string `json:"status"` + TotalSteps int `json:"total_steps"` + CompletedSteps int `json:"completed_steps"` + FailedSteps int `json:"failed_steps"` + ReturnedSteps int `json:"returned_steps"` + StatusCounts map[string]int `json:"status_counts,omitempty"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + CompletedAt *string `json:"completed_at,omitempty"` + Lineage *RunLineageMetadata `json:"lineage,omitempty"` + Golden *GoldenRunMetadata `json:"golden,omitempty"` } `json:"run"` Executions []apiWorkflowExecution `json:"executions"` } +type workflowRunMetadataReader interface { + GetWorkflowRun(ctx context.Context, runID string) (*types.WorkflowRun, error) +} + +type workflowRunMetadataWriter interface { + StoreWorkflowRun(ctx context.Context, run *types.WorkflowRun) error + GetWorkflowRun(ctx context.Context, runID string) (*types.WorkflowRun, error) +} + +type saveGoldenRunRequest struct { + Name string `json:"name,omitempty"` + Tags []string `json:"tags,omitempty"` +} + type apiWorkflowExecution struct { WorkflowID string `json:"workflow_id"` ExecutionID string `json:"execution_id"` @@ -163,6 +199,7 @@ func (h *WorkflowRunHandler) ListWorkflowRunsHandler(c *gin.Context) { summaries := make([]WorkflowRunSummary, 0, len(runAggregations)) for _, agg := range runAggregations { summary := convertAggregationToSummary(agg) + h.enrichRunMetadata(ctx, &summary) summary.Trigger = handlers.TriggerForRun( ctx, h.storage, @@ -189,6 +226,89 @@ func (h *WorkflowRunHandler) ListWorkflowRunsHandler(c *gin.Context) { c.JSON(http.StatusOK, response) } +func (h *WorkflowRunHandler) SaveGoldenRunHandler(c *gin.Context) { + ctx := c.Request.Context() + runID := strings.TrimSpace(c.Param("run_id")) + if runID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "run_id is required"}) + return + } + store, ok := h.storage.(workflowRunMetadataWriter) + if !ok { + c.JSON(http.StatusNotImplemented, gin.H{"error": "workflow run metadata is not available for this storage backend"}) + return + } + + var req saveGoldenRunRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("invalid request body: %v", err)}) + return + } + + filter := types.ExecutionFilter{RunID: &runID, SortBy: "started_at", SortDescending: false, Limit: 10000} + executions, err := h.storage.QueryExecutionRecords(ctx, filter) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query run executions"}) + return + } + if len(executions) == 0 { + c.JSON(http.StatusNotFound, gin.H{"error": "workflow run not found"}) + return + } + if deriveOverallStatusForUI(executions) != string(types.ExecutionStatusSucceeded) { + c.JSON(http.StatusConflict, gin.H{"error": "only succeeded runs can be saved as golden"}) + return + } + + now := time.Now().UTC() + name := strings.TrimSpace(req.Name) + if name == "" { + name = runID + } + metadata := map[string]interface{}{} + if existing, err := store.GetWorkflowRun(ctx, runID); err == nil && existing != nil { + metadata = decodeWorkflowRunMetadata(existing.Metadata) + } + metadata["golden"] = GoldenRunMetadata{ + Name: name, + Tags: sanitizeStringList(req.Tags), + SavedBy: "user", + SavedAt: now.Format(time.RFC3339), + } + encoded, err := json.Marshal(metadata) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encode golden metadata"}) + return + } + + rootExecutionID := executions[0].ExecutionID + for _, exec := range executions { + if exec.ParentExecutionID == nil || *exec.ParentExecutionID == "" { + rootExecutionID = exec.ExecutionID + break + } + } + run := &types.WorkflowRun{ + RunID: runID, + RootWorkflowID: runID, + RootExecutionID: &rootExecutionID, + Status: string(types.ExecutionStatusSucceeded), + TotalSteps: len(executions), + CompletedSteps: len(executions), + Metadata: json.RawMessage(encoded), + CreatedAt: executions[0].StartedAt, + UpdatedAt: now, + } + if err := store.StoreWorkflowRun(ctx, run); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save golden run"}) + return + } + + summary := summarizeRun(runID, executions) + h.enrichRunMetadata(ctx, &summary) + c.JSON(http.StatusOK, summary) +} + // convertAggregationToSummary converts a storage.RunSummaryAggregation to WorkflowRunSummary func convertAggregationToSummary(agg *storage.RunSummaryAggregation) WorkflowRunSummary { summary := WorkflowRunSummary{ @@ -337,6 +457,10 @@ func (h *WorkflowRunHandler) GetWorkflowRunDetailHandler(c *gin.Context) { detail.Run.CompletedAt = dag.CompletedAt } _ = name + if metadata := h.loadRunMetadata(ctx, runID); metadata != nil { + detail.Run.Lineage = metadata.Lineage + detail.Run.Golden = metadata.Golden + } if agg := h.loadRunSummary(ctx, runID); agg != nil { detail.Run.TotalSteps = agg.TotalExecutions @@ -456,6 +580,105 @@ func summarizeRun(runID string, executions []*types.Execution) WorkflowRunSummar return summary } +type parsedRunMetadata struct { + Lineage *RunLineageMetadata + Golden *GoldenRunMetadata +} + +func (h *WorkflowRunHandler) enrichRunMetadata(ctx context.Context, summary *WorkflowRunSummary) { + if summary == nil { + return + } + metadata := h.loadRunMetadata(ctx, summary.RunID) + if metadata == nil { + return + } + summary.Lineage = metadata.Lineage + summary.Golden = metadata.Golden +} + +func (h *WorkflowRunHandler) loadRunMetadata(ctx context.Context, runID string) *parsedRunMetadata { + reader, ok := h.storage.(workflowRunMetadataReader) + if !ok { + return nil + } + run, err := reader.GetWorkflowRun(ctx, runID) + if err != nil || run == nil || len(run.Metadata) == 0 { + return nil + } + metadata := decodeWorkflowRunMetadata(run.Metadata) + if len(metadata) == 0 { + return nil + } + + parsed := &parsedRunMetadata{} + if raw, ok := metadata["lineage"]; ok { + if encoded, err := json.Marshal(raw); err == nil { + var lineage RunLineageMetadata + if err := json.Unmarshal(encoded, &lineage); err == nil { + parsed.Lineage = &lineage + } + } + } + if raw, ok := metadata["golden"]; ok { + if encoded, err := json.Marshal(raw); err == nil { + var golden GoldenRunMetadata + if err := json.Unmarshal(encoded, &golden); err == nil { + parsed.Golden = &golden + } + } + } + if parsed.Lineage == nil && parsed.Golden == nil { + return nil + } + return parsed +} + +func decodeWorkflowRunMetadata(raw json.RawMessage) map[string]interface{} { + metadata := map[string]interface{}{} + if len(raw) == 0 { + return metadata + } + if err := json.Unmarshal(raw, &metadata); err != nil { + return map[string]interface{}{} + } + return metadata +} + +func sanitizeStringList(values []string) []string { + out := make([]string, 0, len(values)) + seen := map[string]struct{}{} + for _, value := range values { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + continue + } + if _, ok := seen[trimmed]; ok { + continue + } + seen[trimmed] = struct{}{} + out = append(out, trimmed) + } + return out +} + +func deriveOverallStatusForUI(executions []*types.Execution) string { + counts := make(map[string]int) + active := 0 + for _, exec := range executions { + if exec == nil { + continue + } + status := types.NormalizeExecutionStatus(exec.Status) + counts[status]++ + switch status { + case string(types.ExecutionStatusRunning), string(types.ExecutionStatusWaiting), string(types.ExecutionStatusPending), string(types.ExecutionStatusQueued): + active++ + } + } + return deriveStatusFromCounts(counts, active) +} + func (h *WorkflowRunHandler) loadRunSummary(ctx context.Context, runID string) *storage.RunSummaryAggregation { filter := types.ExecutionFilter{ RunID: &runID, diff --git a/control-plane/internal/handlers/workflow_dag.go b/control-plane/internal/handlers/workflow_dag.go index b2da45a1f..703108d16 100644 --- a/control-plane/internal/handlers/workflow_dag.go +++ b/control-plane/internal/handlers/workflow_dag.go @@ -39,9 +39,16 @@ type WorkflowDAGNode struct { Notes []types.ExecutionNote `json:"notes"` NotesCount int `json:"notes_count"` LatestNote *types.ExecutionNote `json:"latest_note,omitempty"` + Reuse *ExecutionReuseInfo `json:"reuse,omitempty"` External *WorkflowDAGExternal `json:"external,omitempty"` } +type ExecutionReuseInfo struct { + Hit bool `json:"hit"` + SourceExecutionID string `json:"source_execution_id"` + SourceRunID string `json:"source_run_id,omitempty"` +} + type WorkflowDAGResponse struct { RootWorkflowID string `json:"root_workflow_id"` WorkflowStatus string `json:"workflow_status"` @@ -56,6 +63,24 @@ type WorkflowDAGResponse struct { // this run, when one exists. Populated by walking the root execution's // VC chain back to the parent trigger_event VC. Trigger *types.TriggerEventMetadata `json:"trigger,omitempty"` + Lineage *RunLineageMetadata `json:"lineage,omitempty"` + Golden *GoldenRunMetadata `json:"golden,omitempty"` +} + +type RunLineageMetadata struct { + Kind string `json:"kind,omitempty"` + SourceRunID string `json:"source_run_id,omitempty"` + SourceExecutionID string `json:"source_execution_id,omitempty"` + RestartedExecutionID string `json:"restarted_execution_id,omitempty"` + Reuse string `json:"reuse,omitempty"` + Scope string `json:"scope,omitempty"` +} + +type GoldenRunMetadata struct { + Name string `json:"name,omitempty"` + Tags []string `json:"tags,omitempty"` + SavedBy string `json:"saved_by,omitempty"` + SavedAt string `json:"saved_at,omitempty"` } type SessionWorkflowsResponse struct { @@ -72,10 +97,12 @@ type WorkflowDAGLightweightNode struct { AgentNodeID string `json:"agent_node_id"` ReasonerID string `json:"reasoner_id"` Status string `json:"status"` + StatusReason *string `json:"status_reason,omitempty"` StartedAt string `json:"started_at"` CompletedAt *string `json:"completed_at,omitempty"` DurationMS *int64 `json:"duration_ms,omitempty"` WorkflowDepth int `json:"workflow_depth"` + Reuse *ExecutionReuseInfo `json:"reuse,omitempty"` External *WorkflowDAGExternal `json:"external,omitempty"` } @@ -134,6 +161,12 @@ type WorkflowDAGLightweightResponse struct { // Trigger describes the inbound webhook (or schedule) that originated // this run, when one exists. Trigger *types.TriggerEventMetadata `json:"trigger,omitempty"` + Lineage *RunLineageMetadata `json:"lineage,omitempty"` + Golden *GoldenRunMetadata `json:"golden,omitempty"` +} + +type workflowRunMetadataGetter interface { + GetWorkflowRun(ctx context.Context, runID string) (*types.WorkflowRun, error) } func GetWorkflowDAGHandler(storageProvider storage.StorageProvider) gin.HandlerFunc { @@ -163,9 +196,15 @@ func (s *executionGraphService) handleGetWorkflowDAG(c *gin.Context) { } rootExecID := findRootExecutionID(executions) + lineage, golden := s.loadRunMetadata(ctx, runID) if isLightweightRequest(c) { timeline, workflowStatus, workflowName, sessionID, actorID, maxDepth := buildLightweightExecutionDAG(executions) + if lineage != nil && lineage.SourceRunID != "" { + for i := range timeline { + fillReuseSourceRunNode(timeline[i].Reuse, lineage.SourceRunID) + } + } wh := aggregateWebhookRunData(ctx, s.store, executions) response := WorkflowDAGLightweightResponse{ @@ -183,6 +222,8 @@ func (s *executionGraphService) handleGetWorkflowDAG(c *gin.Context) { WebhookSummary: wh.summary, WebhookFailures: wh.failures, Trigger: TriggerForRun(ctx, s.store, runID, rootExecID), + Lineage: lineage, + Golden: golden, } c.JSON(http.StatusOK, response) @@ -190,6 +231,12 @@ func (s *executionGraphService) handleGetWorkflowDAG(c *gin.Context) { } dag, timeline, workflowStatus, workflowName, sessionID, actorID, maxDepth := buildExecutionDAG(executions) + if lineage != nil && lineage.SourceRunID != "" { + fillReuseSourceRunDAG(&dag, lineage.SourceRunID) + for i := range timeline { + fillReuseSourceRunNode(timeline[i].Reuse, lineage.SourceRunID) + } + } response := WorkflowDAGResponse{ RootWorkflowID: runID, @@ -202,11 +249,47 @@ func (s *executionGraphService) handleGetWorkflowDAG(c *gin.Context) { DAG: dag, Timeline: timeline, Trigger: TriggerForRun(ctx, s.store, runID, rootExecID), + Lineage: lineage, + Golden: golden, } c.JSON(http.StatusOK, response) } +func (s *executionGraphService) loadRunMetadata(ctx context.Context, runID string) (*RunLineageMetadata, *GoldenRunMetadata) { + getter, ok := s.store.(workflowRunMetadataGetter) + if !ok { + return nil, nil + } + run, err := getter.GetWorkflowRun(ctx, runID) + if err != nil || run == nil || len(run.Metadata) == 0 { + return nil, nil + } + var raw map[string]interface{} + if err := json.Unmarshal(run.Metadata, &raw); err != nil { + return nil, nil + } + var lineage *RunLineageMetadata + if value, ok := raw["lineage"]; ok { + if encoded, err := json.Marshal(value); err == nil { + var parsed RunLineageMetadata + if err := json.Unmarshal(encoded, &parsed); err == nil { + lineage = &parsed + } + } + } + var golden *GoldenRunMetadata + if value, ok := raw["golden"]; ok { + if encoded, err := json.Marshal(value); err == nil { + var parsed GoldenRunMetadata + if err := json.Unmarshal(encoded, &parsed); err == nil { + golden = &parsed + } + } + } + return lineage, golden +} + // findRootExecutionID returns the execution_id of the root node — the // execution whose ParentExecutionID is nil/empty. Used to anchor trigger // enrichment to the run's originating step. Falls back to the first @@ -711,6 +794,7 @@ func executionToDAGNode(exec *types.Execution, depth int) WorkflowDAGNode { WorkflowDepth: depth, Notes: []types.ExecutionNote{}, NotesCount: 0, + Reuse: executionReuseInfo(exec), External: externalAnnotationFromExecution(exec), } } @@ -768,14 +852,55 @@ func executionToLightweightNode(exec *types.Execution, depth int) WorkflowDAGLig AgentNodeID: exec.AgentNodeID, ReasonerID: exec.ReasonerID, Status: types.NormalizeExecutionStatus(exec.Status), + StatusReason: exec.StatusReason, StartedAt: started, CompletedAt: completed, DurationMS: exec.DurationMS, WorkflowDepth: depth, + Reuse: executionReuseInfo(exec), External: externalAnnotationFromExecution(exec), } } +func executionReuseInfo(exec *types.Execution) *ExecutionReuseInfo { + if exec == nil || exec.StatusReason == nil { + return nil + } + const prefix = "replayed_from_execution:" + sourceExecutionID := strings.TrimSpace(strings.TrimPrefix(*exec.StatusReason, prefix)) + if sourceExecutionID == "" || sourceExecutionID == *exec.StatusReason { + return nil + } + return &ExecutionReuseInfo{ + Hit: true, + SourceExecutionID: sourceExecutionID, + } +} + +// fillReuseSourceRunDAG back-fills the source run id on a node's reuse marker and +// its descendants. The per-node reuse info is derived from the execution status +// reason, which only records the source execution id; every reused node in a +// restarted run shares the run's single replay source, so the run id is taken +// from the run lineage rather than re-queried per node. +func fillReuseSourceRunDAG(node *WorkflowDAGNode, sourceRunID string) { + if node == nil { + return + } + if node.Reuse != nil && node.Reuse.Hit && node.Reuse.SourceRunID == "" { + node.Reuse.SourceRunID = sourceRunID + } + for i := range node.Children { + fillReuseSourceRunDAG(&node.Children[i], sourceRunID) + } +} + +// fillReuseSourceRunNode back-fills the source run id on a single reuse marker. +func fillReuseSourceRunNode(reuse *ExecutionReuseInfo, sourceRunID string) { + if reuse != nil && reuse.Hit && reuse.SourceRunID == "" { + reuse.SourceRunID = sourceRunID + } +} + func externalAnnotationFromExecution(exec *types.Execution) *WorkflowDAGExternal { if exec == nil || len(exec.ResultPayload) == 0 { return nil diff --git a/control-plane/internal/handlers/workflow_dag_metadata_test.go b/control-plane/internal/handlers/workflow_dag_metadata_test.go new file mode 100644 index 000000000..b9ba4e210 --- /dev/null +++ b/control-plane/internal/handlers/workflow_dag_metadata_test.go @@ -0,0 +1,107 @@ +package handlers + +import ( + "context" + "encoding/json" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/Agent-Field/agentfield/control-plane/internal/storage" + "github.com/Agent-Field/agentfield/control-plane/pkg/types" + "github.com/stretchr/testify/require" +) + +func TestExecutionGraphServiceLoadRunMetadata(t *testing.T) { + ctx := context.Background() + store := storage.NewLocalStorage(storage.LocalStorageConfig{}) + err := store.Initialize(ctx, storage.StorageConfig{ + Mode: "local", + Local: storage.LocalStorageConfig{ + DatabasePath: filepath.Join(t.TempDir(), "agentfield.db"), + KVStorePath: filepath.Join(t.TempDir(), "agentfield.bolt"), + }, + }) + if err != nil && strings.Contains(strings.ToLower(err.Error()), "fts5") { + t.Skip("sqlite3 compiled without FTS5") + } + require.NoError(t, err) + t.Cleanup(func() { + _ = store.Close(ctx) + }) + + svc := newExecutionGraphService(store) + + require.NoError(t, store.StoreWorkflowRun(ctx, &types.WorkflowRun{ + RunID: "run-restart", + Metadata: json.RawMessage(`{ + "lineage": { + "kind": "fork", + "source_run_id": "old-run", + "source_execution_id": "old-child", + "restarted_execution_id": "old-root", + "reuse": "succeeded-before", + "scope": "workflow" + }, + "golden": { + "name": "Known good retry", + "tags": ["smoke", "restart"], + "saved_by": "user", + "saved_at": "2026-04-08T12:00:00Z" + } + }`), + CreatedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + })) + + lineage, golden := svc.loadRunMetadata(ctx, "run-restart") + require.NotNil(t, lineage) + require.Equal(t, "fork", lineage.Kind) + require.Equal(t, "old-run", lineage.SourceRunID) + require.Equal(t, "old-child", lineage.SourceExecutionID) + require.Equal(t, "old-root", lineage.RestartedExecutionID) + require.Equal(t, "succeeded-before", lineage.Reuse) + require.Equal(t, "workflow", lineage.Scope) + require.NotNil(t, golden) + require.Equal(t, "Known good retry", golden.Name) + require.Equal(t, []string{"smoke", "restart"}, golden.Tags) + + require.NoError(t, store.StoreWorkflowRun(ctx, &types.WorkflowRun{ + RunID: "run-invalid", + Metadata: json.RawMessage(`{"lineage":`), + CreatedAt: time.Now().UTC(), + UpdatedAt: time.Now().UTC(), + })) + lineage, golden = svc.loadRunMetadata(ctx, "run-invalid") + require.Nil(t, lineage) + require.Nil(t, golden) + + lineage, golden = svc.loadRunMetadata(ctx, "run-missing") + require.Nil(t, lineage) + require.Nil(t, golden) +} + +func TestFillReuseSourceRun(t *testing.T) { + reused := "replayed_from_execution:src-exec" + child := &types.Execution{ExecutionID: "child", StatusReason: &reused} + root := WorkflowDAGNode{ + ExecutionID: "root", + Children: []WorkflowDAGNode{executionToDAGNode(child, 1)}, + } + + // Per-node reuse info only carries the source execution id until back-filled. + require.NotNil(t, root.Children[0].Reuse) + require.Equal(t, "src-exec", root.Children[0].Reuse.SourceExecutionID) + require.Empty(t, root.Children[0].Reuse.SourceRunID) + + fillReuseSourceRunDAG(&root, "old-run") + require.Equal(t, "old-run", root.Children[0].Reuse.SourceRunID) + require.Nil(t, root.Reuse, "non-reused nodes must not gain a reuse marker") + + // Existing source run ids are preserved, and nil markers are a no-op. + preset := &ExecutionReuseInfo{Hit: true, SourceExecutionID: "e", SourceRunID: "keep"} + fillReuseSourceRunNode(preset, "other-run") + require.Equal(t, "keep", preset.SourceRunID) + fillReuseSourceRunNode(nil, "old-run") +} diff --git a/control-plane/internal/server/routes_core.go b/control-plane/internal/server/routes_core.go index e4a828baf..1e9988382 100644 --- a/control-plane/internal/server/routes_core.go +++ b/control-plane/internal/server/routes_core.go @@ -124,6 +124,7 @@ func (s *AgentFieldServer) registerCoreRoutes(agentAPI *gin.RouterGroup) { agentAPI.POST("/executions/:execution_id/cancel", handlers.CancelExecutionHandler(s.storage)) agentAPI.POST("/executions/:execution_id/pause", handlers.PauseExecutionHandler(s.storage)) agentAPI.POST("/executions/:execution_id/resume", handlers.ResumeExecutionHandler(s.storage)) + agentAPI.POST("/executions/:execution_id/restart", handlers.RestartExecutionHandler(s.storage, s.payloadStore, s.webhookDispatcher, s.config.AgentField.ExecutionQueue.AgentCallTimeout, s.config.Features.DID.Authorization.InternalToken)) agentAPI.POST("/workflows/:workflowId/cancel-tree", handlers.CancelWorkflowTreeHandler(s.storage)) // Approval workflow endpoints — CP manages execution state only; diff --git a/control-plane/internal/server/routes_ui.go b/control-plane/internal/server/routes_ui.go index e95374db3..7fef38988 100644 --- a/control-plane/internal/server/routes_ui.go +++ b/control-plane/internal/server/routes_ui.go @@ -165,6 +165,7 @@ func (s *AgentFieldServer) registerUIAPI() { executions.POST("/:execution_id/cancel", handlers.CancelExecutionHandler(s.storage)) executions.POST("/:execution_id/pause", handlers.PauseExecutionHandler(s.storage)) executions.POST("/:execution_id/resume", handlers.ResumeExecutionHandler(s.storage)) + executions.POST("/:execution_id/restart", handlers.RestartExecutionHandler(s.storage, s.payloadStore, s.webhookDispatcher, s.config.AgentField.ExecutionQueue.AgentCallTimeout, s.config.Features.DID.Authorization.InternalToken)) // Execution notes endpoints for UI executions.POST("/note", handlers.AddExecutionNoteHandler(s.storage, s.noteOwnershipEnforced())) @@ -278,6 +279,7 @@ func (s *AgentFieldServer) registerUIAPI() { workflowRunsHandler := ui.NewWorkflowRunHandler(s.storage) uiAPIV2.GET("/workflow-runs", workflowRunsHandler.ListWorkflowRunsHandler) uiAPIV2.GET("/workflow-runs/:run_id", workflowRunsHandler.GetWorkflowRunDetailHandler) + uiAPIV2.POST("/workflow-runs/:run_id/golden", workflowRunsHandler.SaveGoldenRunHandler) } } diff --git a/control-plane/web/client/src/components/RunTrace.tsx b/control-plane/web/client/src/components/RunTrace.tsx index 20f92797a..230452ff1 100644 --- a/control-plane/web/client/src/components/RunTrace.tsx +++ b/control-plane/web/client/src/components/RunTrace.tsx @@ -1,9 +1,10 @@ import { useMemo, useRef } from "react"; import { useVirtualizer } from "@tanstack/react-virtual"; import { cn } from "@/lib/utils"; +import { statusTone } from "@/lib/theme"; import { getStatusTheme, normalizeExecutionStatus } from "@/utils/status"; import { StatusDot } from "@/components/ui/status-pill"; -import { Network } from "@/components/ui/icon-bridge"; +import { GitBranch, Network } from "@/components/ui/icon-bridge"; import type { WorkflowDAGLightweightNode } from "@/types/workflows"; // ─── Tree node type (runtime-constructed) ──────────────────────────────────── @@ -291,6 +292,20 @@ function TraceRow({ )} + {node.reuse?.hit ? ( + + + + ) : null} + {/* Duration bar */}
diff --git a/control-plane/web/client/src/components/WorkflowDAG/NodeDetailSidebar.tsx b/control-plane/web/client/src/components/WorkflowDAG/NodeDetailSidebar.tsx index 4b996018c..bd9d65ebc 100644 --- a/control-plane/web/client/src/components/WorkflowDAG/NodeDetailSidebar.tsx +++ b/control-plane/web/client/src/components/WorkflowDAG/NodeDetailSidebar.tsx @@ -1,4 +1,5 @@ import { Close } from "@/components/ui/icon-bridge"; +import { GitBranch, RotateCcw } from "lucide-react"; import { useEffect, useState } from "react"; import { createPortal } from "react-dom"; import { statusTone } from "../../lib/theme"; @@ -26,22 +27,33 @@ interface WorkflowNodeData { workflow_depth: number; task_name?: string; agent_name?: string; + reuse?: { + hit: boolean; + source_execution_id: string; + source_run_id?: string; + }; } interface NodeDetailSidebarProps { node: WorkflowNodeData | null; isOpen: boolean; onClose: () => void; + onRestartWorkflowFromNode?: (node: WorkflowNodeData) => void; + onRerunNodeOnly?: (node: WorkflowNodeData) => void; + onForkFromNode?: (node: WorkflowNodeData) => void; } export function NodeDetailSidebar({ node, isOpen, onClose, + onRestartWorkflowFromNode, + onRerunNodeOnly, + onForkFromNode, }: NodeDetailSidebarProps) { const [copySuccess, setCopySuccess] = useState(null); const { nodeDetails, loading, error, refetch } = useNodeDetails( - node?.execution_id + node?.execution_id, ); // Handle copy to clipboard @@ -72,7 +84,7 @@ export function NodeDetailSidebar({ if (isOpen) { // Focus the close button when sidebar opens const closeButton = document.querySelector( - "[data-sidebar-close]" + "[data-sidebar-close]", ) as HTMLElement; closeButton?.focus(); } @@ -95,7 +107,7 @@ export function NodeDetailSidebar({
@@ -106,7 +118,7 @@ export function NodeDetailSidebar({ "fixed top-0 right-0 z-[80] flex h-full w-full max-w-full flex-col transition-transform duration-300 ease-out", "border-l border-border bg-card/95 backdrop-blur-xl", "shadow-[0px_24px_60px_-28px_color-mix(in_srgb,_var(--foreground)_18%,_transparent)]", - isOpen ? "translate-x-0" : "translate-x-full" + isOpen ? "translate-x-0" : "translate-x-full", )} role="dialog" aria-modal="true" @@ -141,6 +153,44 @@ export function NodeDetailSidebar({ {/* Content - Scrollable */}
+ {onRestartWorkflowFromNode || onRerunNodeOnly || onForkFromNode ? ( +
+ {onRestartWorkflowFromNode ? ( + + ) : null} + {onRerunNodeOnly ? ( + + ) : null} + {onForkFromNode ? ( + + ) : null} +
+ ) : null} + {node.reuse?.hit ? : null} {loading ? ( ) : error ? ( @@ -196,15 +246,45 @@ export function NodeDetailSidebar({ return createPortal(sidebarContent, document.body); } +function ReuseNotice({ node }: { node: WorkflowNodeData }) { + return ( +
+
+ + Reused output +
+

+ Output came from{" "} + + {node.reuse?.source_execution_id} + + {node.reuse?.source_run_id ? ( + <> + {" "} + in{" "} + + {node.reuse.source_run_id} + + + ) : null} + . +

+
+ ); +} + // Loading skeleton function SidebarSkeleton() { return (
{[...Array(5)].map((_, i) => ( - + @@ -232,7 +312,12 @@ function ErrorState({ return ( -
+

@@ -252,7 +337,6 @@ function ErrorState({ ); } - // ─── Triggers Section ────────────────────────────────────────────────────── interface BoundTrigger { @@ -262,7 +346,6 @@ interface BoundTrigger { public_url: string; } - function TriggersSection({ nodeId }: { nodeId: string }) { const { data: triggers, isLoading } = useQuery({ queryKey: ["node-triggers", nodeId], @@ -273,7 +356,7 @@ function TriggersSection({ nodeId }: { nodeId: string }) { headers: { "X-API-Key": sessionStorage.getItem("apiKey") || "", }, - } + }, ); if (!response.ok) throw new Error("Failed to fetch triggers"); return response.json(); @@ -308,8 +391,8 @@ function TriggersSection({ nodeId }: { nodeId: string }) {
- No triggers bound to this node yet. Triggers route inbound - webhook events into this node's reasoners. + No triggers bound to this node yet. Triggers route inbound webhook + events into this node's reasoners.
diff --git a/control-plane/web/client/src/components/WorkflowDAG/WorkflowNode.tsx b/control-plane/web/client/src/components/WorkflowDAG/WorkflowNode.tsx index e02bad7ce..84bbd4f5d 100644 --- a/control-plane/web/client/src/components/WorkflowDAG/WorkflowNode.tsx +++ b/control-plane/web/client/src/components/WorkflowDAG/WorkflowNode.tsx @@ -3,6 +3,7 @@ import { Handle, Position, useStore } from "@xyflow/react"; import { Calendar, CheckmarkFilled, + GitBranch, Network, Time, User, @@ -38,7 +39,12 @@ interface WorkflowNodeData { isFocusRelated?: boolean; focusDistance?: number; parent_execution_id?: string; - viewMode?: 'standard' | 'performance' | 'debug'; + reuse?: { + hit: boolean; + source_execution_id: string; + source_run_id?: string; + }; + viewMode?: "standard" | "performance" | "debug"; performanceIntensity?: number; minPerformance?: number; maxPerformance?: number; @@ -84,549 +90,620 @@ const StatusPlaceholder = memo( StatusPlaceholder.displayName = "StatusPlaceholder"; -export const WorkflowNode = memo(({ data, selected }: WorkflowNodeProps) => { - const showFullContent = useStore(zoomSelector); - const normalizedStatus = normalizeExecutionStatus(data.status); - const { - isSearchMatch = false, - isDimmed = false, - isFocusPrimary = false, - isFocusRelated = false, - } = data; - const viewMode = data.viewMode ?? 'standard'; - const performanceIntensity = Math.min(Math.max(data.performanceIntensity ?? 0, 0), 1); - const external = data.external; - - const formatDuration = (durationMs?: number) => { - if (!durationMs) return "-"; - if (durationMs < 1000) return `${durationMs}ms`; - if (durationMs < 60000) return `${(durationMs / 1000).toFixed(1)}s`; - const minutes = Math.floor(durationMs / 60000); - const seconds = Math.floor((durationMs % 60000) / 1000); - return `${minutes}m ${seconds}s`; - }; - - const formatTimestamp = (timestamp: string) => { - return new Date(timestamp).toLocaleTimeString("en-US", { - hour12: false, - hour: "2-digit", - minute: "2-digit", - second: "2-digit", - }); - }; - - // Icon reflects this node's OWN canonical status only. A running child - // under a cancelled parent must still spin/pulse — do not propagate parent - // state into child node visuals. - const getStatusIcon = (status: CanonicalStatus) => { - const theme = getStatusTheme(status); - const Icon = theme.icon; - const iconClass = cn("h-4 w-4", theme.iconClass); - return ( - +export const WorkflowNode = memo( + ({ data, selected }: WorkflowNodeProps) => { + const showFullContent = useStore(zoomSelector); + const normalizedStatus = normalizeExecutionStatus(data.status); + const { + isSearchMatch = false, + isDimmed = false, + isFocusPrimary = false, + isFocusRelated = false, + } = data; + const viewMode = data.viewMode ?? "standard"; + const performanceIntensity = Math.min( + Math.max(data.performanceIntensity ?? 0, 0), + 1, ); - }; - - const getStatusText = (status: string) => { - return getStatusLabel(status); - }; - - // Convert Python function names to human-readable format - const humanizeText = (text: string): string => { - return text - // Replace underscores with spaces - .replace(/_/g, ' ') - // Replace hyphens with spaces - .replace(/-/g, ' ') - // Capitalize first letter of each word - .replace(/\b\w/g, l => l.toUpperCase()) - // Clean up multiple spaces - .replace(/\s+/g, ' ') - .trim(); - }; - - // Calculate optimal node width with generous sizing for better UX - const calculateOptimalWidth = (taskText: string, agentText: string): number => { - const minWidth = 200; // Increased minimum width - const maxWidth = 360; // Increased maximum width for better readability - const charWidth = 7.5; // More accurate character width for the font - - const taskHuman = humanizeText(taskText); - const agentHuman = humanizeText(agentText); + const external = data.external; + + const formatDuration = (durationMs?: number) => { + if (!durationMs) return "-"; + if (durationMs < 1000) return `${durationMs}ms`; + if (durationMs < 60000) return `${(durationMs / 1000).toFixed(1)}s`; + const minutes = Math.floor(durationMs / 60000); + const seconds = Math.floor((durationMs % 60000) / 1000); + return `${minutes}m ${seconds}s`; + }; - // Calculate width needed to fit text comfortably in two lines - const taskWordsLength = taskHuman.split(' ').reduce((max, word) => Math.max(max, word.length), 0); - const agentWordsLength = agentHuman.split(' ').reduce((max, word) => Math.max(max, word.length), 0); + const formatTimestamp = (timestamp: string) => { + return new Date(timestamp).toLocaleTimeString("en-US", { + hour12: false, + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + }); + }; - // Base width on longest single word plus some buffer for multi-word lines - const longestWord = Math.max(taskWordsLength, agentWordsLength); - const estimatedWidth = Math.max( - longestWord * charWidth * 1.8, // 1.8x for comfortable two-line display - (taskHuman.length / 2.2) * charWidth, // Divide by 2.2 instead of 2 for more generous spacing - (agentHuman.length / 2.2) * charWidth - ) + 80; // Increased padding for icons and spacing + // Icon reflects this node's OWN canonical status only. A running child + // under a cancelled parent must still spin/pulse — do not propagate parent + // state into child node visuals. + const getStatusIcon = (status: CanonicalStatus) => { + const theme = getStatusTheme(status); + const Icon = theme.icon; + const iconClass = cn("h-4 w-4", theme.iconClass); + return ( + + ); + }; - return Math.min(maxWidth, Math.max(minWidth, estimatedWidth)); - }; + const getStatusText = (status: string) => { + return getStatusLabel(status); + }; - // Smart text formatting that prefers single line when possible - const formatTextForDisplay = (text: string, nodeWidth: number, isAgentName: boolean = false) => { - const humanText = humanizeText(text); - const words = humanText.split(' '); + // Convert Python function names to human-readable format + const humanizeText = (text: string): string => { + return ( + text + // Replace underscores with spaces + .replace(/_/g, " ") + // Replace hyphens with spaces + .replace(/-/g, " ") + // Capitalize first letter of each word + .replace(/\b\w/g, (l) => l.toUpperCase()) + // Clean up multiple spaces + .replace(/\s+/g, " ") + .trim() + ); + }; - // Calculate available character space based on node width - const availableWidth = nodeWidth - (isAgentName ? 100 : 80); // More space for agent names (account for icon) - const charWidth = 7.5; - const maxCharsForSingleLine = Math.floor(availableWidth / charWidth); - const maxCharsPerLine = Math.floor(maxCharsForSingleLine * 0.9); // 90% for comfortable reading + // Calculate optimal node width with generous sizing for better UX + const calculateOptimalWidth = ( + taskText: string, + agentText: string, + ): number => { + const minWidth = 200; // Increased minimum width + const maxWidth = 360; // Increased maximum width for better readability + const charWidth = 7.5; // More accurate character width for the font + + const taskHuman = humanizeText(taskText); + const agentHuman = humanizeText(agentText); + + // Calculate width needed to fit text comfortably in two lines + const taskWordsLength = taskHuman + .split(" ") + .reduce((max, word) => Math.max(max, word.length), 0); + const agentWordsLength = agentHuman + .split(" ") + .reduce((max, word) => Math.max(max, word.length), 0); + + // Base width on longest single word plus some buffer for multi-word lines + const longestWord = Math.max(taskWordsLength, agentWordsLength); + const estimatedWidth = + Math.max( + longestWord * charWidth * 1.8, // 1.8x for comfortable two-line display + (taskHuman.length / 2.2) * charWidth, // Divide by 2.2 instead of 2 for more generous spacing + (agentHuman.length / 2.2) * charWidth, + ) + 80; // Increased padding for icons and spacing + + return Math.min(maxWidth, Math.max(minWidth, estimatedWidth)); + }; - // PRIORITY 1: Try to fit in single line (especially for agent names) - if (humanText.length <= maxCharsForSingleLine || (isAgentName && humanText.length <= maxCharsForSingleLine * 1.1)) { - return { line1: humanText, line2: "", isSingleLine: true }; - } + // Smart text formatting that prefers single line when possible + const formatTextForDisplay = ( + text: string, + nodeWidth: number, + isAgentName: boolean = false, + ) => { + const humanText = humanizeText(text); + const words = humanText.split(" "); + + // Calculate available character space based on node width + const availableWidth = nodeWidth - (isAgentName ? 100 : 80); // More space for agent names (account for icon) + const charWidth = 7.5; + const maxCharsForSingleLine = Math.floor(availableWidth / charWidth); + const maxCharsPerLine = Math.floor(maxCharsForSingleLine * 0.9); // 90% for comfortable reading + + // PRIORITY 1: Try to fit in single line (especially for agent names) + if ( + humanText.length <= maxCharsForSingleLine || + (isAgentName && humanText.length <= maxCharsForSingleLine * 1.1) + ) { + return { line1: humanText, line2: "", isSingleLine: true }; + } - // PRIORITY 2: For agent names, be more aggressive about single line - if (isAgentName && humanText.length <= maxCharsForSingleLine * 1.2) { - return { line1: humanText, line2: "", isSingleLine: true }; - } + // PRIORITY 2: For agent names, be more aggressive about single line + if (isAgentName && humanText.length <= maxCharsForSingleLine * 1.2) { + return { line1: humanText, line2: "", isSingleLine: true }; + } - // PRIORITY 3: Only use two lines when absolutely necessary - if (words.length === 1) { - // Single long word - break intelligently at natural points - const breakPoint = Math.ceil(humanText.length / 2); - return { - line1: humanText.substring(0, breakPoint), - line2: humanText.substring(breakPoint), - isSingleLine: false - }; - } + // PRIORITY 3: Only use two lines when absolutely necessary + if (words.length === 1) { + // Single long word - break intelligently at natural points + const breakPoint = Math.ceil(humanText.length / 2); + return { + line1: humanText.substring(0, breakPoint), + line2: humanText.substring(breakPoint), + isSingleLine: false, + }; + } - // Multiple words - smart distribution - let line1 = ""; - let line2 = ""; - - // Try to fit as many complete words as possible on first line - for (let i = 0; i < words.length; i++) { - const word = words[i]; - const testLine1 = line1 + (line1 ? " " : "") + word; - - if (testLine1.length <= maxCharsPerLine || line1 === "") { - line1 = testLine1; - } else { - // Add remaining words to line2 - line2 = words.slice(i).join(" "); - break; + // Multiple words - smart distribution + let line1 = ""; + let line2 = ""; + + // Try to fit as many complete words as possible on first line + for (let i = 0; i < words.length; i++) { + const word = words[i]; + const testLine1 = line1 + (line1 ? " " : "") + word; + + if (testLine1.length <= maxCharsPerLine || line1 === "") { + line1 = testLine1; + } else { + // Add remaining words to line2 + line2 = words.slice(i).join(" "); + break; + } } - } - // Ensure line2 isn't too long - if (line2.length > maxCharsPerLine) { - // Rebalance by moving some words back to line1 if possible - const allWords = words; - const midPoint = Math.ceil(allWords.length / 2); - line1 = allWords.slice(0, midPoint).join(" "); - line2 = allWords.slice(midPoint).join(" "); - } + // Ensure line2 isn't too long + if (line2.length > maxCharsPerLine) { + // Rebalance by moving some words back to line1 if possible + const allWords = words; + const midPoint = Math.ceil(allWords.length / 2); + line1 = allWords.slice(0, midPoint).join(" "); + line2 = allWords.slice(midPoint).join(" "); + } - return { line1, line2, isSingleLine: false }; - }; + return { line1, line2, isSingleLine: false }; + }; - const statusTheme = getStatusTheme(normalizedStatus); - const statusColorVar = statusTheme.hexColor; - const statusBorderVar = `color-mix(in srgb, ${statusTheme.hexColor} 60%, transparent)`; - const statusGlowVar = `color-mix(in srgb, ${statusTheme.hexColor} 38%, transparent)`; + const statusTheme = getStatusTheme(normalizedStatus); + const statusColorVar = statusTheme.hexColor; + const statusBorderVar = `color-mix(in srgb, ${statusTheme.hexColor} 60%, transparent)`; + const statusGlowVar = `color-mix(in srgb, ${statusTheme.hexColor} 38%, transparent)`; - const agentColor = agentColorManager.getAgentColor( - data.agent_name || data.agent_node_id, - data.agent_node_id - ); + const agentColor = agentColorManager.getAgentColor( + data.agent_name || data.agent_node_id, + data.agent_node_id, + ); - const tokenFor = (token: StatusToneKey | "primary") => { - if (token === "primary") { + const tokenFor = (token: StatusToneKey | "primary") => { + if (token === "primary") { + return { + border: `color-mix(in srgb, var(--primary) 55%, transparent)`, + glow: `color-mix(in srgb, var(--primary) 40%, transparent)`, + }; + } return { - border: `color-mix(in srgb, var(--primary) 55%, transparent)`, - glow: `color-mix(in srgb, var(--primary) 40%, transparent)`, + border: `var(--status-${token}-border)`, + glow: `color-mix(in srgb, var(--status-${token}) 40%, transparent)`, }; - } - return { - border: `var(--status-${token}-border)`, - glow: `color-mix(in srgb, var(--status-${token}) 40%, transparent)`, }; - }; - let borderColor = statusBorderVar; - let glowColor = statusGlowVar; - const hasHighlight = isFocusPrimary || isFocusRelated || isSearchMatch || selected; - - if (isFocusPrimary) { - const highlight = tokenFor("success"); - borderColor = highlight.border; - glowColor = highlight.glow; - } else if (isFocusRelated || isSearchMatch) { - const highlight = tokenFor("info"); - borderColor = highlight.border; - glowColor = highlight.glow; - } else if (selected) { - const highlight = tokenFor("primary"); - borderColor = highlight.border; - glowColor = highlight.glow; - } else if (viewMode === "performance") { - const heat = Math.min(70, 35 + performanceIntensity * 40); - borderColor = `color-mix(in srgb, var(--status-warning) ${heat}%, transparent)`; - glowColor = `color-mix(in srgb, var(--status-warning) ${Math.min(30 + performanceIntensity * 50, 85)}%, transparent)`; - } else if (viewMode === "debug") { - borderColor = "var(--border)"; - glowColor = "color-mix(in srgb, var(--muted-foreground) 45%, transparent)"; - } - - if (external && !hasHighlight && viewMode !== "performance") { - borderColor = "color-mix(in srgb, rgb(14 165 233) 68%, transparent)"; - glowColor = "color-mix(in srgb, rgb(14 165 233) 48%, transparent)"; - } - - const baseShadow = - "0 1px 2px color-mix(in srgb, var(--foreground) 6%, transparent), 0 1px 3px color-mix(in srgb, var(--foreground) 4%, transparent)"; - const accentShadow = `0 0 0 1px ${borderColor}`; - const glowShadow = isDimmed ? "" : `0 0 12px -2px ${glowColor}`; - const compositeShadow = [accentShadow, baseShadow, glowShadow].filter(Boolean).join(", "); - - const baseBackground = `linear-gradient(145deg, color-mix(in srgb, ${statusColorVar} 6%, var(--card)), var(--card))`; - let background = baseBackground; - - if (!hasHighlight) { - if (external && viewMode !== "performance") { - background = "linear-gradient(145deg, color-mix(in srgb, rgb(14 165 233) 10%, var(--card)), var(--card))"; + let borderColor = statusBorderVar; + let glowColor = statusGlowVar; + const hasHighlight = + isFocusPrimary || isFocusRelated || isSearchMatch || selected; + + if (isFocusPrimary) { + const highlight = tokenFor("success"); + borderColor = highlight.border; + glowColor = highlight.glow; + } else if (isFocusRelated || isSearchMatch) { + const highlight = tokenFor("info"); + borderColor = highlight.border; + glowColor = highlight.glow; + } else if (selected) { + const highlight = tokenFor("primary"); + borderColor = highlight.border; + glowColor = highlight.glow; } else if (viewMode === "performance") { - const heat = Math.min(65, 25 + performanceIntensity * 45); - background = `linear-gradient(135deg, color-mix(in srgb, var(--status-warning) ${heat}%, transparent), var(--card))`; + const heat = Math.min(70, 35 + performanceIntensity * 40); + borderColor = `color-mix(in srgb, var(--status-warning) ${heat}%, transparent)`; + glowColor = `color-mix(in srgb, var(--status-warning) ${Math.min(30 + performanceIntensity * 50, 85)}%, transparent)`; } else if (viewMode === "debug") { - background = `linear-gradient(135deg, color-mix(in srgb, var(--muted) 18%, transparent), var(--card))`; + borderColor = "var(--border)"; + glowColor = + "color-mix(in srgb, var(--muted-foreground) 45%, transparent)"; } - } - // Calculate optimal node width based on content - const taskText = data.task_name || data.reasoner_id; - const agentText = data.agent_name || data.agent_node_id; - const nodeWidth = calculateOptimalWidth(taskText, agentText); + if (external && !hasHighlight && viewMode !== "performance") { + borderColor = "color-mix(in srgb, rgb(14 165 233) 68%, transparent)"; + glowColor = "color-mix(in srgb, rgb(14 165 233) 48%, transparent)"; + } + + const baseShadow = + "0 1px 2px color-mix(in srgb, var(--foreground) 6%, transparent), 0 1px 3px color-mix(in srgb, var(--foreground) 4%, transparent)"; + const accentShadow = `0 0 0 1px ${borderColor}`; + const glowShadow = isDimmed ? "" : `0 0 12px -2px ${glowColor}`; + const compositeShadow = [accentShadow, baseShadow, glowShadow] + .filter(Boolean) + .join(", "); + + const baseBackground = `linear-gradient(145deg, color-mix(in srgb, ${statusColorVar} 6%, var(--card)), var(--card))`; + let background = baseBackground; + + if (!hasHighlight) { + if (external && viewMode !== "performance") { + background = "linear-gradient(145deg, color-mix(in srgb, rgb(14 165 233) 10%, var(--card)), var(--card))"; + } else if (viewMode === "performance") { + const heat = Math.min(65, 25 + performanceIntensity * 45); + background = `linear-gradient(135deg, color-mix(in srgb, var(--status-warning) ${heat}%, transparent), var(--card))`; + } else if (viewMode === "debug") { + background = `linear-gradient(135deg, color-mix(in srgb, var(--muted) 18%, transparent), var(--card))`; + } + } + + // Calculate optimal node width based on content + const taskText = data.task_name || data.reasoner_id; + const agentText = data.agent_name || data.agent_node_id; + const nodeWidth = calculateOptimalWidth(taskText, agentText); + + // Early return for simplified view when zoomed out + if (!showFullContent) { + return ( +
+ + + +
+ ); + } - // Early return for simplified view when zoomed out - if (!showFullContent) { return ( -
+
+ {/* Agent color left border accent */} +
+ + {external && ( +
+ + + {external.provider ? `External · ${external.provider}` : "External"} + +
+ )} + + {/* Agent Badge - positioned in top-left */} +
+ +
+ {/* Invisible connection handles - Required for ReactFlow edges but hidden from user */} - -
- ); - } - - return ( -
- {/* Agent color left border accent */} -
- - {external && ( -
- - - {external.provider ? `External · ${external.provider}` : "External"} - -
- )} - - {/* Agent Badge - positioned in top-left */} -
- -
- {/* Invisible connection handles - Required for ReactFlow edges but hidden from user */} - - - -
-
- {getStatusIcon(normalizedStatus)} -
-
- {(() => { - const taskFormatted = formatTextForDisplay(taskText, nodeWidth, false); - return ( -
+
+
+ {getStatusIcon(normalizedStatus)} +
+ {data.reuse?.hit ? ( +
+ +
+ ) : null} + +
+ {(() => { + const taskFormatted = formatTextForDisplay( + taskText, + nodeWidth, + false, + ); + return (
- {taskFormatted.line1} -
- {taskFormatted.line2 && (
- {taskFormatted.line2} + {taskFormatted.line1}
- )} -
- ); - })()} -
+ {taskFormatted.line2 && ( +
+ {taskFormatted.line2} +
+ )} +
+ ); + })()} +
-
- - {(() => { - const agentFormatted = formatTextForDisplay(agentText, nodeWidth, true); - return ( -
+
+ + {(() => { + const agentFormatted = formatTextForDisplay( + agentText, + nodeWidth, + true, + ); + return (
- {agentFormatted.line1} -
- {agentFormatted.line2 && (
- {agentFormatted.line2} + {agentFormatted.line1}
- )} -
- ); - })()} -
- -
- {/* Duration with Time icon */} -
-
+ ); + })()}
- {/* Timestamp with Calendar icon */} -
- - - {formatTimestamp(data.started_at)} - -
-
- - {viewMode === 'performance' && ( -
-
-
-
-
- Load {(performanceIntensity * 100).toFixed(0)}% - {data.duration_ms ? {formatDuration(data.duration_ms)} : null} +
+ {/* Duration with Time icon */} +
+
-
- )} - {viewMode === 'debug' && ( -
-
ID: {data.execution_id}
- {data.parent_execution_id &&
Parent: {data.parent_execution_id.slice(0, 8)}…
} -
- Status:{" "} - - {getStatusText(normalizedStatus)} + {/* Timestamp with Calendar icon */} +
+ + + {formatTimestamp(data.started_at)}
- )} -
- - {/* Hover Tooltip */} -
-
- {/* Header */} -
- {humanizeText(data.task_name || data.reasoner_id)} -
- {/* Main Info */} -
-
- - - Agent: - - - {humanizeText(data.agent_name || data.agent_node_id)} - + {viewMode === "performance" && ( +
+
+
+
+
+ Load {(performanceIntensity * 100).toFixed(0)}% + {data.duration_ms ? ( + {formatDuration(data.duration_ms)} + ) : null} +
- -
- - {getStatusIcon(normalizedStatus)} - Status: - - - {getStatusText(normalizedStatus)} - + )} + + {viewMode === "debug" && ( +
+
ID: {data.execution_id}
+ {data.parent_execution_id && ( +
Parent: {data.parent_execution_id.slice(0, 8)}…
+ )} + {data.reuse?.hit && ( +
Reused: {data.reuse.source_execution_id.slice(0, 8)}…
+ )} +
+ Status:{" "} + + {getStatusText(normalizedStatus)} + +
+ )} +
-
- - - - {formatDuration(data.duration_ms)} - + {/* Hover Tooltip */} +
+
+ {/* Header */} +
+ {humanizeText(data.task_name || data.reasoner_id)}
-
- - - Started: - - - {formatTimestamp(data.started_at)} - -
+ {/* Main Info */} +
+
+ + + Agent: + + + {humanizeText(data.agent_name || data.agent_node_id)} + +
- {data.completed_at && (
- - Completed: + {getStatusIcon(normalizedStatus)} + Status: + + + {getStatusText(normalizedStatus)} + +
+ +
+ + + + {formatDuration(data.duration_ms)} + +
+ +
+ + + Started: - {formatTimestamp(data.completed_at)} + {formatTimestamp(data.started_at)}
- )} -
- {/* Technical Details */} -
-
- Execution ID: - - {data.execution_id.slice(0, 8)}... - + {data.completed_at && ( +
+ + + Completed: + + + {formatTimestamp(data.completed_at)} + +
+ )}
-
- Workflow ID: - - {data.workflow_id.slice(0, 8)}... - + + {/* Technical Details */} +
+
+ Execution ID: + + {data.execution_id.slice(0, 8)}... + +
+
+ Workflow ID: + + {data.workflow_id.slice(0, 8)}... + +
+ {data.reuse?.hit ? ( +
+ Reused from: + + {data.reuse.source_execution_id.slice(0, 8)}... + +
+ ) : null}
-
- {/* Tooltip Arrow */} -
+ {/* Tooltip Arrow */} +
+
-
- ); -}, (prevProps, nextProps) => { - // Custom comparison function for React.memo - // Only re-render if essential data has changed - return ( - prevProps.data.execution_id === nextProps.data.execution_id && - prevProps.data.status === nextProps.data.status && - prevProps.data.duration_ms === nextProps.data.duration_ms && - prevProps.data.task_name === nextProps.data.task_name && - prevProps.data.agent_name === nextProps.data.agent_name && - prevProps.data.started_at === nextProps.data.started_at && - prevProps.data.completed_at === nextProps.data.completed_at && - prevProps.data.external === nextProps.data.external && - prevProps.data.isSearchMatch === nextProps.data.isSearchMatch && - prevProps.data.isDimmed === nextProps.data.isDimmed && - prevProps.data.isFocusPrimary === nextProps.data.isFocusPrimary && - prevProps.data.isFocusRelated === nextProps.data.isFocusRelated && - prevProps.selected === nextProps.selected - ); -}); - -WorkflowNode.displayName = 'WorkflowNode'; + ); + }, + (prevProps, nextProps) => { + // Custom comparison function for React.memo + // Only re-render if essential data has changed + return ( + prevProps.data.execution_id === nextProps.data.execution_id && + prevProps.data.status === nextProps.data.status && + prevProps.data.duration_ms === nextProps.data.duration_ms && + prevProps.data.task_name === nextProps.data.task_name && + prevProps.data.agent_name === nextProps.data.agent_name && + prevProps.data.started_at === nextProps.data.started_at && + prevProps.data.completed_at === nextProps.data.completed_at && + prevProps.data.external === nextProps.data.external && + prevProps.data.reuse?.source_execution_id === + nextProps.data.reuse?.source_execution_id && + prevProps.data.isSearchMatch === nextProps.data.isSearchMatch && + prevProps.data.isDimmed === nextProps.data.isDimmed && + prevProps.data.isFocusPrimary === nextProps.data.isFocusPrimary && + prevProps.data.isFocusRelated === nextProps.data.isFocusRelated && + prevProps.selected === nextProps.selected + ); + }, +); + +WorkflowNode.displayName = "WorkflowNode"; diff --git a/control-plane/web/client/src/components/WorkflowDAG/index.tsx b/control-plane/web/client/src/components/WorkflowDAG/index.tsx index 8309575a0..cfafd5145 100644 --- a/control-plane/web/client/src/components/WorkflowDAG/index.tsx +++ b/control-plane/web/client/src/components/WorkflowDAG/index.tsx @@ -14,7 +14,13 @@ import { useReactFlow, } from "@xyflow/react"; import "@xyflow/react/dist/style.css"; -import React, { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import React, { + useCallback, + useEffect, + useMemo, + useRef, + useState, +} from "react"; import type { CSSProperties, ReactNode } from "react"; import { AgentLegend } from "./AgentLegend"; @@ -44,9 +50,7 @@ import { import { buildDeckGraph, type DeckGraphData } from "./DeckGLGraph"; import { getWorkflowDAG } from "../../services/workflowsApi"; -import type { - WorkflowDAGLightweightResponse, -} from "../../types/workflows"; +import type { WorkflowDAGLightweightResponse } from "../../types/workflows"; import { X } from "@/components/ui/icon-bridge"; import { Button } from "../ui/button"; import { Card, CardContent } from "../ui/card"; @@ -69,7 +73,6 @@ export interface WorkflowDAGControls { changeLayout: (layout: AllLayoutType) => void; } - interface WorkflowDAGViewerProps { workflowId: string; dagData?: WorkflowDAGResponse | WorkflowDAGLightweightResponse | null; @@ -77,6 +80,9 @@ interface WorkflowDAGViewerProps { error?: string | null; onClose?: () => void; onExecutionClick?: (execution: WorkflowDAGNode) => void; + onRestartWorkflowFromNode?: (execution: WorkflowDAGNode) => void; + onRerunNodeOnly?: (execution: WorkflowDAGNode) => void; + onForkFromNode?: (execution: WorkflowDAGNode) => void; className?: string; searchQuery?: string; focusMode?: boolean; @@ -182,12 +188,15 @@ function WorkflowDAGViewerInner({ viewMode = "standard", onLayoutInfoChange, onExecutionClick, + onRestartWorkflowFromNode, + onRerunNodeOnly, + onForkFromNode, }: WorkflowDAGViewerProps) { const [nodes, setNodes, onNodesChange] = useNodesState([] as Node[]); const [edges, setEdges, onEdgesChange] = useEdgesState([] as Edge[]); const [currentLayout, setCurrentLayout] = useState("tree"); const [selectedNode, setSelectedNode] = useState( - null + null, ); const [sidebarOpen, setSidebarOpen] = useState(false); const [selectedAgent, setSelectedAgent] = useState(null); @@ -202,8 +211,12 @@ function WorkflowDAGViewerInner({ const [internalDagData, setInternalDagData] = useState(null); const largeGraphRef = useRef(false); - const [deckGraphData, setDeckGraphData] = useState(null); - const handleLayoutChangeRef = useRef<(layout: AllLayoutType) => void>(() => {}); + const [deckGraphData, setDeckGraphData] = useState( + null, + ); + const handleLayoutChangeRef = useRef<(layout: AllLayoutType) => void>( + () => {}, + ); const externalDagData = useMemo(() => { if (dagData === undefined || dagData === null) { @@ -248,10 +261,8 @@ function WorkflowDAGViewerInner({ const max = Math.max(...durations); const min = Math.min(...durations); const avg = - durations.reduce( - (sum: number, value: number) => sum + value, - 0 - ) / durations.length; + durations.reduce((sum: number, value: number) => sum + value, 0) / + durations.length; return { max, min, avg }; }, [effectiveDagData]); @@ -261,7 +272,7 @@ function WorkflowDAGViewerInner({ new LayoutManager({ enableWorker: import.meta.env?.VITE_ENABLE_LAYOUT_WORKER === "true", }), - [] + [], ); // Memoized objects to prevent unnecessary re-renders @@ -269,14 +280,14 @@ function WorkflowDAGViewerInner({ () => ({ workflow: WorkflowNode, }), - [] + [], ); const edgeTypes = useMemo( () => ({ floating: FloatingEdge, }), - [] + [], ); const fitViewOptions = useMemo( @@ -286,7 +297,7 @@ function WorkflowDAGViewerInner({ minZoom: 0, // Allow unlimited zoom out for large graphs maxZoom: 2, }), - [] + [], ); const defaultViewport = useMemo( @@ -295,7 +306,7 @@ function WorkflowDAGViewerInner({ y: 0, zoom: 0.8, }), - [] + [], ); // Use external loading/error states if provided, otherwise fall back to internal fetching @@ -322,10 +333,12 @@ function WorkflowDAGViewerInner({ const prevWorkflowIdForResetRef = useRef(undefined); const viewportStorageKey = useMemo( () => `workflowDAGViewport:${workflowId}`, - [workflowId] + [workflowId], ); - function isValidSavedViewport(v: unknown): v is { x: number; y: number; zoom: number } { + function isValidSavedViewport( + v: unknown, + ): v is { x: number; y: number; zoom: number } { if (!v || typeof v !== "object") return false; const o = v as Record; return ( @@ -345,7 +358,7 @@ function WorkflowDAGViewerInner({ const MAX_FOCUS_DEPTH = 2; const [debouncedSearchQuery, setDebouncedSearchQuery] = useState( - searchQuery ?? "" + searchQuery ?? "", ); useEffect(() => { @@ -416,7 +429,7 @@ function WorkflowDAGViewerInner({ } const nodesToFocus = getNodes().filter((node) => - nodeIds.includes(node.id) + nodeIds.includes(node.id), ); if (nodesToFocus.length === 0) { return; @@ -444,7 +457,7 @@ function WorkflowDAGViewerInner({ minY: Number.POSITIVE_INFINITY, maxX: Number.NEGATIVE_INFINITY, maxY: Number.NEGATIVE_INFINITY, - } + }, ); if ( @@ -484,9 +497,7 @@ function WorkflowDAGViewerInner({ } const edgesSnapshot = edgesRef.current; - const normalizedSearch = (debouncedSearchQuery || "") - .trim() - .toLowerCase(); + const normalizedSearch = (debouncedSearchQuery || "").trim().toLowerCase(); const focusIds = focusMode ? new Set(focusedNodeIds ?? []) : new Set(); @@ -635,8 +646,8 @@ function WorkflowDAGViewerInner({ ? Boolean( (sourceInfo?.focusDistance !== undefined && sourceInfo.focusDistance <= 1) || - (targetInfo?.focusDistance !== undefined && - targetInfo.focusDistance <= 1) + (targetInfo?.focusDistance !== undefined && + targetInfo.focusDistance <= 1), ) : false; @@ -648,7 +659,7 @@ function WorkflowDAGViewerInner({ const shouldDimByAgent = selectedAgent ? Boolean( (sourceInfo && sourceInfo.agentLabel !== selectedAgent) || - (targetInfo && targetInfo.agentLabel !== selectedAgent) + (targetInfo && targetInfo.agentLabel !== selectedAgent), ) : false; @@ -704,7 +715,7 @@ function WorkflowDAGViewerInner({ if (!isDimmed && viewMode === "performance") { updatedStyle.strokeWidth = Math.max( Number(updatedStyle.strokeWidth ?? 2.5), - 2.4 + targetIntensity * 2.2 + 2.4 + targetIntensity * 2.2, ); const heat = Math.min(80, 35 + targetIntensity * 45); updatedStyle.stroke = `color-mix(in srgb, var(--status-info) ${heat}%, transparent)`; @@ -752,24 +763,24 @@ function WorkflowDAGViewerInner({ durationStats, ]); - // Handle node click — delegate to parent via onExecutionClick when provided, - // otherwise fall back to the internal NodeDetailSidebar (legacy usage). + // Handle node click — keep parent selection in sync and open the debugger sidebar. const handleNodeClick = useCallback( (_event: React.MouseEvent, node: Node) => { const nodeData = node.data as unknown as WorkflowDAGNode; if (onExecutionClick && nodeData) { onExecutionClick(nodeData); - } else { + } + if (nodeData) { setSelectedNode(nodeData); setSidebarOpen(true); } }, - [onExecutionClick] + [onExecutionClick], ); // Handle sidebar close const closeSidebarTimeoutRef = useRef | null>( - null + null, ); const handleCloseSidebar = useCallback(() => { setSidebarOpen(false); @@ -803,7 +814,8 @@ function WorkflowDAGViewerInner({ }; if (onExecutionClick && localNode) { onExecutionClick(localNode); - } else { + } + if (localNode) { setSelectedNode(localNode); setSidebarOpen(true); } @@ -852,7 +864,7 @@ function WorkflowDAGViewerInner({ return { nodesForLayout, edgesForLayout, executionMap }; }, - [viewMode] + [viewMode], ); // Handle layout change @@ -875,7 +887,7 @@ function WorkflowDAGViewerInner({ nodes, edges, newLayout, - (progress) => setLayoutProgress(progress) + (progress) => setLayoutProgress(progress), ); setNodes(layoutedNodes); @@ -906,7 +918,7 @@ function WorkflowDAGViewerInner({ setEdges, layoutManager, setViewport, - ] + ], ); // Keep the ref in sync so controls.changeLayout() always uses latest @@ -918,11 +930,18 @@ function WorkflowDAGViewerInner({ onLayoutInfoChange({ currentLayout, availableLayouts: layoutManager.getAvailableLayouts(nodes.length), - isSlowLayout: (layout: AllLayoutType) => layoutManager.isSlowLayout(layout), + isSlowLayout: (layout: AllLayoutType) => + layoutManager.isSlowLayout(layout), isLargeGraph: layoutManager.isLargeGraph(nodes.length), isApplyingLayout, }); - }, [currentLayout, isApplyingLayout, nodes.length, layoutManager, onLayoutInfoChange]); + }, [ + currentLayout, + isApplyingLayout, + nodes.length, + layoutManager, + onLayoutInfoChange, + ]); // Utility: merge new DAG data incrementally without resetting positions const mergeIncrementalUpdate = useCallback( @@ -932,14 +951,11 @@ function WorkflowDAGViewerInner({ buildGraphElements(timeline); if (largeGraphRef.current) { - const flowNodes = applySimpleGridLayout( - nodesForLayout, - executionMap - ); + const flowNodes = applySimpleGridLayout(nodesForLayout, executionMap); const nodesWithMode = decorateNodesWithViewMode(flowNodes, viewMode); const edgesWithStatus = decorateEdgesWithStatus( edgesForLayout, - executionMap + executionMap, ); nodesRef.current = nodesWithMode; edgesRef.current = edgesWithStatus; @@ -953,10 +969,10 @@ function WorkflowDAGViewerInner({ const timelineIds = new Set(timeline.map((node) => node.execution_id)); const hasNewNodes = nodesForLayout.some( - (node) => !existingIds.has(node.id) + (node) => !existingIds.has(node.id), ); const hasRemovedNodes = nodesRef.current.some( - (node) => !timelineIds.has(node.id) + (node) => !timelineIds.has(node.id), ); if (hasNewNodes || hasRemovedNodes) { @@ -965,7 +981,7 @@ function WorkflowDAGViewerInner({ await layoutManager.applyLayout( nodesForLayout, edgesForLayout, - currentLayout + currentLayout, ); const nodesWithMode = layoutedNodes.map((node) => ({ @@ -1053,7 +1069,7 @@ function WorkflowDAGViewerInner({ setNodes, setVisualEpoch, viewMode, - ] + ], ); // Process DAG data (either from props or internal fetch) @@ -1090,7 +1106,6 @@ function WorkflowDAGViewerInner({ // Process the data if we have it if (data) { - const timeline = data.timeline ?? []; // Determine the appropriate default layout based on graph size @@ -1103,14 +1118,11 @@ function WorkflowDAGViewerInner({ // For large graphs, build DeckGL data instead of React Flow layout if (useSimpleLayout) { - const flowNodes = applySimpleGridLayout( - nodesForLayout, - executionMap - ); + const flowNodes = applySimpleGridLayout(nodesForLayout, executionMap); const nodesWithMode = decorateNodesWithViewMode(flowNodes, viewMode); const edgesWithStatus = decorateEdgesWithStatus( edgesForLayout, - executionMap + executionMap, ); setNodes(nodesWithMode); setEdges(edgesWithStatus); @@ -1124,7 +1136,11 @@ function WorkflowDAGViewerInner({ } // Update current layout if it's still the initial "tree" value - if (!useSimpleLayout && currentLayout === "tree" && defaultLayout !== "tree") { + if ( + !useSimpleLayout && + currentLayout === "tree" && + defaultLayout !== "tree" + ) { setCurrentLayout(defaultLayout); } @@ -1140,7 +1156,7 @@ function WorkflowDAGViewerInner({ await layoutManager.applyLayout( nodesForLayout, edgesForLayout, - layoutToUse + layoutToUse, ); flowNodes = layoutedNodes; flowEdges = layoutedEdges; @@ -1148,7 +1164,7 @@ function WorkflowDAGViewerInner({ const nodesWithMode = decorateNodesWithViewMode(flowNodes, viewMode); const edgesWithStatus = decorateEdgesWithStatus( flowEdges, - executionMap + executionMap, ); setNodes(nodesWithMode); @@ -1251,14 +1267,7 @@ function WorkflowDAGViewerInner({ cancelAnimationFrame(rafOuter); cancelAnimationFrame(rafInner); }; - }, [ - loading, - error, - nodes.length, - viewportStorageKey, - fitView, - setViewport, - ]); + }, [loading, error, nodes.length, viewportStorageKey, fitView, setViewport]); const flowContainerRef = useRef(null); const deckGlRef = useRef(null); @@ -1315,7 +1324,12 @@ function WorkflowDAGViewerInner({ onCollapse={() => setGraphExpanded(false)} workflowTitle={effectiveDagData?.workflow_name} > -
+
void} + onNodeClick={ + handleDeckNodeClick as unknown as ( + node: import("./DeckGLGraph").WorkflowDAGNode, + ) => void + } /> {graphExpanded ? ( @@ -1375,13 +1393,14 @@ function WorkflowDAGViewerInner({
- {!onExecutionClick && ( - - )} +
); @@ -1394,7 +1413,12 @@ function WorkflowDAGViewerInner({ onCollapse={() => setGraphExpanded(false)} workflowTitle={effectiveDagData?.workflow_name} > -
+
- {shouldUseVirtualizedDAG ? ( - >} - edgeTypes={edgeTypes as Record>} - className="min-h-[280px] w-full flex-1" - style={{ width: "100%", height: "100%", minHeight: 280 }} - threshold={PERFORMANCE_THRESHOLD} - workflowId={workflowId} - graphLayout={graphLayout} - onAgentFilter={handleAgentFilter} - selectedAgent={selectedAgent} - onExpandGraph={() => setGraphExpanded(true)} - /> - ) : ( - { - viewportRef.current = viewport; - try { - localStorage.setItem( - viewportStorageKey, - JSON.stringify(viewport) - ); - } catch (storageError) { - console.warn( - "Failed to persist workflow DAG viewport", - storageError - ); + {shouldUseVirtualizedDAG ? ( + > } - }} - nodeTypes={nodeTypes} - edgeTypes={edgeTypes} - connectionLineComponent={FloatingConnectionLine} - connectionMode={ConnectionMode.Strict} - // Allow node dragging but disable edge creation - nodesDraggable={true} - nodesConnectable={false} - elementsSelectable={true} - fitViewOptions={fitViewOptions} - defaultViewport={defaultViewport} - minZoom={0} - maxZoom={2} - proOptions={{ hideAttribution: true }} - > - > + } + className="min-h-[280px] w-full flex-1" + style={{ width: "100%", height: "100%", minHeight: 280 }} + threshold={PERFORMANCE_THRESHOLD} + workflowId={workflowId} + graphLayout={graphLayout} + onAgentFilter={handleAgentFilter} + selectedAgent={selectedAgent} + onExpandGraph={() => setGraphExpanded(true)} /> - - {/* Agent Legend */} - - setGraphExpanded(true)} + ) : ( + { + viewportRef.current = viewport; + try { + localStorage.setItem( + viewportStorageKey, + JSON.stringify(viewport), + ); + } catch (storageError) { + console.warn( + "Failed to persist workflow DAG viewport", + storageError, + ); + } + }} + nodeTypes={nodeTypes} + edgeTypes={edgeTypes} + connectionLineComponent={FloatingConnectionLine} + connectionMode={ConnectionMode.Strict} + // Allow node dragging but disable edge creation + nodesDraggable={true} + nodesConnectable={false} + elementsSelectable={true} + fitViewOptions={fitViewOptions} + defaultViewport={defaultViewport} + minZoom={0} + maxZoom={2} + proOptions={{ hideAttribution: true }} + > + - - - - )} + + {/* Agent Legend */} + + setGraphExpanded(true)} + /> + + + + )}
- {!onExecutionClick && ( - - )} +
); diff --git a/control-plane/web/client/src/components/WorkflowDAG/workflowDagUtils.ts b/control-plane/web/client/src/components/WorkflowDAG/workflowDagUtils.ts index 17692e2f5..b19fdfa93 100644 --- a/control-plane/web/client/src/components/WorkflowDAG/workflowDagUtils.ts +++ b/control-plane/web/client/src/components/WorkflowDAG/workflowDagUtils.ts @@ -20,6 +20,11 @@ export interface WorkflowDAGNode { workflow_depth: number; agent_name?: string; task_name?: string; + reuse?: { + hit: boolean; + source_execution_id: string; + source_run_id?: string; + }; children?: WorkflowDAGNode[]; external?: WorkflowDAGExternal; } @@ -46,7 +51,7 @@ export const SIMPLE_LAYOUT_X_SPACING = 240; export const SIMPLE_LAYOUT_Y_SPACING = 120; export function isLightweightDAGResponse( - data: WorkflowDAGResponse | WorkflowDAGLightweightResponse | null + data: WorkflowDAGResponse | WorkflowDAGLightweightResponse | null, ): data is WorkflowDAGLightweightResponse { if (!data) { return false; @@ -57,7 +62,7 @@ export function isLightweightDAGResponse( export function mapLightweightNode( node: WorkflowDAGLightweightNode, - workflowId: string + workflowId: string, ): WorkflowDAGNode { return { workflow_id: workflowId, @@ -70,15 +75,16 @@ export function mapLightweightNode( duration_ms: node.duration_ms, parent_execution_id: node.parent_execution_id, workflow_depth: node.workflow_depth, + reuse: node.reuse, external: node.external, }; } export function adaptLightweightResponse( - response: WorkflowDAGLightweightResponse + response: WorkflowDAGLightweightResponse, ): WorkflowDAGResponse { const timeline = response.timeline.map((node) => - mapLightweightNode(node, response.root_workflow_id) + mapLightweightNode(node, response.root_workflow_id), ); return { @@ -98,7 +104,7 @@ export function adaptLightweightResponse( export function applySimpleGridLayout( nodes: Node[], - executionMap: Map + executionMap: Map, ): Node[] { const sortedNodes = [...nodes].sort((a, b) => { const depthA = @@ -135,7 +141,10 @@ export function applySimpleGridLayout( }); } -export function decorateNodesWithViewMode(nodes: Node[], viewMode: string): Node[] { +export function decorateNodesWithViewMode( + nodes: Node[], + viewMode: string, +): Node[] { return nodes.map((node) => ({ ...node, data: { @@ -147,7 +156,7 @@ export function decorateNodesWithViewMode(nodes: Node[], viewMode: string): Node export function decorateEdgesWithStatus( edges: Edge[], - executionMap: Map + executionMap: Map, ): Edge[] { return edges.map((edge) => { const targetExecution = executionMap.get(edge.target); diff --git a/control-plane/web/client/src/components/execution/CompactExecutionHeader.tsx b/control-plane/web/client/src/components/execution/CompactExecutionHeader.tsx index 1823d2dbe..ed8b3be24 100644 --- a/control-plane/web/client/src/components/execution/CompactExecutionHeader.tsx +++ b/control-plane/web/client/src/components/execution/CompactExecutionHeader.tsx @@ -31,11 +31,7 @@ import { AlertDialogHeader, AlertDialogTitle, } from "../ui/alert-dialog"; -import { - Tooltip, - TooltipContent, - TooltipTrigger, -} from "../ui/tooltip"; +import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip"; import { DropdownMenu, DropdownMenuContent, @@ -59,12 +55,14 @@ import { import { cancelExecution, pauseExecution, + restartExecution, resumeExecution, } from "../../services/executionsApi"; import { useErrorNotification, useSuccessNotification, } from "../ui/notification"; +import { statusTone } from "../../lib/theme"; /* ═══════════════════════════════════════════════════════════════ Types @@ -102,8 +100,7 @@ const formatDuration = formatDurationHumanReadable; function useLiveElapsed(startedAt?: string, status?: string): number | null { const normalized = normalizeExecutionStatus(status); const isActive = normalized === "running"; - const isNonTerminal = - !isTerminalStatus(status) && normalized !== "unknown"; + const isNonTerminal = !isTerminalStatus(status) && normalized !== "unknown"; const [elapsed, setElapsed] = useState(() => { if (!startedAt) return null; @@ -155,13 +152,15 @@ export function CompactExecutionHeader({ const isTerminal = isTerminalStatus(execution.status); const showLifecycleControls = isRunning || isPaused; const hasError = !!execution.error_message; + const canRestart = isTerminal || hasError; /* ── Mutation state ── */ const [cancelDialogOpen, setCancelDialogOpen] = useState(false); const [isCancelling, setIsCancelling] = useState(false); const [isPausing, setIsPausing] = useState(false); const [isResuming, setIsResuming] = useState(false); - const isMutating = isCancelling || isPausing || isResuming; + const [isRestarting, setIsRestarting] = useState(false); + const isMutating = isCancelling || isPausing || isResuming || isRestarting; const showSuccess = useSuccessNotification(); const showError = useErrorNotification(); @@ -214,9 +213,7 @@ export function CompactExecutionHeader({ } catch (error) { showError( "Resume failed", - error instanceof Error - ? error.message - : "Unable to resume execution.", + error instanceof Error ? error.message : "Unable to resume execution.", ); } finally { setIsResuming(false); @@ -237,15 +234,36 @@ export function CompactExecutionHeader({ } catch (error) { showError( "Stop failed", - error instanceof Error - ? error.message - : "Unable to stop execution.", + error instanceof Error ? error.message : "Unable to stop execution.", ); } finally { setIsCancelling(false); } }; + const handleRestart = async () => { + if (isMutating) return; + try { + setIsRestarting(true); + const restarted = await restartExecution(execution.execution_id, { + scope: "workflow", + reuse: "succeeded-before", + }); + showSuccess( + "Workflow restarted", + `New run ${restarted.run_id.slice(0, 8)} started from this point.`, + ); + navigate(`/executions/${restarted.execution_id}`); + } catch (error) { + showError( + "Restart failed", + error instanceof Error ? error.message : "Unable to restart workflow.", + ); + } finally { + setIsRestarting(false); + } + }; + const handleCopyRunId = async () => { try { await navigator.clipboard.writeText(execution.execution_id); @@ -380,8 +398,7 @@ export function CompactExecutionHeader({ - Started{" "} - {new Date(execution.started_at).toLocaleString()} + Started {new Date(execution.started_at).toLocaleString()} )} @@ -472,6 +489,31 @@ export function CompactExecutionHeader({ )} + {canRestart && ( + + + + + + Restart workflow from this point + + + )} + {/* Refresh with live indicator */} {onRefresh && ( @@ -482,9 +524,7 @@ export function CompactExecutionHeader({ onClick={onRefresh} disabled={isRefreshing} className="relative" - aria-label={ - isRunning ? "Live \u00B7 Refresh" : "Refresh" - } + aria-label={isRunning ? "Live \u00B7 Refresh" : "Refresh"} > {isRunning && ( - + Pause execution @@ -580,11 +617,17 @@ export function CompactExecutionHeader({ )} {showLifecycleControls && } - {onRefresh && ( + {canRestart && ( + + Restart workflow + + )} + {onRefresh && ( + Refresh @@ -707,10 +750,7 @@ export function CompactExecutionHeader({ Keep running - + {isCancelling ? "Stopping\u2026" : "Stop execution"} diff --git a/control-plane/web/client/src/components/runs/RunLifecycleMenu.tsx b/control-plane/web/client/src/components/runs/RunLifecycleMenu.tsx index 327255fac..5163baf44 100644 --- a/control-plane/web/client/src/components/runs/RunLifecycleMenu.tsx +++ b/control-plane/web/client/src/components/runs/RunLifecycleMenu.tsx @@ -5,9 +5,11 @@ import { Play, XCircle, Activity, + GitBranch, } from "lucide-react"; import { cn } from "@/lib/utils"; +import { statusTone } from "@/lib/theme"; import { Button } from "@/components/ui/button"; import { DropdownMenu, @@ -59,6 +61,7 @@ interface RunLifecycleMenuProps { onPause: (run: WorkflowSummary) => void; onResume: (run: WorkflowSummary) => void; onCancel: (run: WorkflowSummary) => void; + onRestart?: (run: WorkflowSummary) => void; } /** @@ -74,6 +77,7 @@ export function RunLifecycleMenu({ onPause, onResume, onCancel, + onRestart, }: RunLifecycleMenuProps) { const [menuOpen, setMenuOpen] = useState(false); const [confirmOpen, setConfirmOpen] = useState(false); @@ -91,7 +95,10 @@ export function RunLifecycleMenu({ const canPause = isRunning && Boolean(run.root_execution_id); const canResume = isPaused && Boolean(run.root_execution_id); const canCancel = !isTerminalStatus(run.status); - const hasAnyAction = canPause || canResume || canCancel; + const canRestart = Boolean( + onRestart && run.root_execution_id && isTerminalStatus(run.status), + ); + const hasAnyAction = canPause || canResume || canCancel || canRestart; // Render an inert placeholder with the same footprint so the column // stays aligned across rows even when no action is available. @@ -160,18 +167,44 @@ export function RunLifecycleMenu({ Resume run ) : null} - {canCancel ? ( + {canRestart ? ( + <> + + + Recovery + + + ) : null} + {canRestart ? ( { setMenuOpen(false); - setConfirmOpen(true); + onRestart?.(run); }} > - - Cancel run + + Restart run ) : null} + {canCancel ? ( + <> + + { + setMenuOpen(false); + setConfirmOpen(true); + }} + > + + Cancel run + + + ) : null} diff --git a/control-plane/web/client/src/components/ui/dropdown-menu.tsx b/control-plane/web/client/src/components/ui/dropdown-menu.tsx index 257298b12..76f0a4085 100644 --- a/control-plane/web/client/src/components/ui/dropdown-menu.tsx +++ b/control-plane/web/client/src/components/ui/dropdown-menu.tsx @@ -47,7 +47,7 @@ const DropdownMenuSubContent = React.forwardRef< ({ - mutationFn: ({ workflowId, reason }) => cancelWorkflowTree(workflowId, reason), + mutationFn: ({ workflowId, reason }) => + cancelWorkflowTree(workflowId, reason), onSuccess: () => { queryClient.invalidateQueries({ queryKey: ["runs"] }); queryClient.invalidateQueries({ queryKey: ["run-dag"] }); @@ -59,3 +64,42 @@ export function useResumeExecution() { }, }); } + +export function useRestartExecution() { + const queryClient = useQueryClient(); + return useMutation< + RestartExecutionResponse, + Error, + string | { executionId: string; request?: RestartExecutionRequest } + >({ + mutationFn: (value) => { + if (typeof value === "string") { + return restartExecution(value); + } + return restartExecution(value.executionId, value.request); + }, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ["runs"] }); + queryClient.invalidateQueries({ queryKey: ["run-dag"] }); + }, + }); +} + +export function useSaveGoldenRun() { + const queryClient = useQueryClient(); + return useMutation({ + mutationFn: ({ + runId, + name, + tags, + }: { + runId: string; + name?: string; + tags?: string[]; + }) => saveGoldenRun(runId, { name, tags }), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ["runs"] }); + queryClient.invalidateQueries({ queryKey: ["run-dag"] }); + }, + }); +} diff --git a/control-plane/web/client/src/pages/RunDetailPage.tsx b/control-plane/web/client/src/pages/RunDetailPage.tsx index 17abfc1fe..9cc7fd662 100644 --- a/control-plane/web/client/src/pages/RunDetailPage.tsx +++ b/control-plane/web/client/src/pages/RunDetailPage.tsx @@ -5,12 +5,11 @@ import { useRunDAG, useCancelWorkflowTree, usePauseExecution, + useRestartExecution, useResumeExecution, + useSaveGoldenRun, } from "@/hooks/queries"; -import { - Card, - CardContent, -} from "@/components/ui/card"; +import { Card, CardContent } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; @@ -20,12 +19,14 @@ import { ChevronDown, FileJson, FileCheck2, + GitBranch, Info, Link2, PauseCircle, Play, RefreshCw, RotateCcw, + Star, XCircle, } from "lucide-react"; import { @@ -48,6 +49,21 @@ import { DropdownMenuSeparator, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; import { Skeleton } from "@/components/ui/skeleton"; import { CopyIdentifierChip } from "@/components/ui/copy-identifier-chip"; import { @@ -57,12 +73,17 @@ import { TooltipTrigger, } from "@/components/ui/tooltip"; import { cn } from "@/lib/utils"; -import { RunTrace, buildTraceTree, formatDuration } from "@/components/RunTrace"; +import { statusTone } from "@/lib/theme"; +import { + RunTrace, + buildTraceTree, + formatDuration, +} from "@/components/RunTrace"; import { SourceIcon } from "@/components/triggers/SourceIcon"; import { ArrowUpRight, RadioTower } from "@/components/ui/icon-bridge"; import { StepDetail } from "@/components/StepDetail"; import { WorkflowDAGViewer } from "@/components/WorkflowDAG"; -import { ErrorBoundary } from '@/components/ErrorBoundary'; +import { ErrorBoundary } from "@/components/ErrorBoundary"; import { ExecutionObservabilityPanel } from "@/components/execution"; import { normalizeExecutionStatus, isTerminalStatus } from "@/utils/status"; import { StatusPill } from "@/components/ui/status-pill"; @@ -74,7 +95,10 @@ import type { WorkflowDAGLightweightResponse, } from "@/types/workflows"; import type { WorkflowExecution } from "@/types/executions"; -import { retryExecutionWebhook, getExecutionDetails } from "@/services/executionsApi"; +import { + retryExecutionWebhook, + getExecutionDetails, +} from "@/services/executionsApi"; import { downloadWorkflowVCAuditFile, getWorkflowVCChain, @@ -82,9 +106,7 @@ import { // ─── Helpers ────────────────────────────────────────────────────────────────── -function computeMaxDuration( - timeline: WorkflowDAGLightweightNode[], -): number { +function computeMaxDuration(timeline: WorkflowDAGLightweightNode[]): number { if (!timeline || timeline.length === 0) return 1; const max = Math.max(...timeline.map((n) => n.duration_ms ?? 0)); return Math.max(max, 1); @@ -104,6 +126,21 @@ const ZERO_WEBHOOK_SUMMARY: WebhookRunSummary = { failed_deliveries: 0, }; +function pickRestartNode( + timeline: WorkflowDAGLightweightNode[] | undefined, +): WorkflowDAGLightweightNode | undefined { + return ( + timeline?.find((node) => { + const status = normalizeExecutionStatus(node.status); + return ( + status === "failed" || status === "timeout" || status === "cancelled" + ); + }) ?? + timeline?.find((node) => node.workflow_depth === 0) ?? + timeline?.[0] + ); +} + function RunContextHint({ label, children, @@ -139,7 +176,9 @@ function deriveRunParticipants(dag: WorkflowDAGLightweightResponse): { ids: string[]; source: RunParticipantsSource; } { - const api = (dag.unique_agent_node_ids ?? []).map((id) => id.trim()).filter(Boolean); + const api = (dag.unique_agent_node_ids ?? []) + .map((id) => id.trim()) + .filter(Boolean); if (api.length > 0) { return { ids: [...new Set(api)].sort(), source: "api_agent" }; } @@ -159,8 +198,6 @@ function deriveRunParticipants(dag: WorkflowDAGLightweightResponse): { return { ids: [...reasoners].sort(), source: "reasoner" }; } - - function RunContextNodesCard({ participantIds, source, @@ -293,15 +330,17 @@ function RunContextWebhooksCard({ )} > -
+

Webhooks

Inbound: the trigger that dispatched this run, if any. Outbound: - HTTP callbacks registered on steps in this run and delivery - attempts recorded by the control plane. Failed deliveries listed - below can be retried here. + HTTP callbacks registered on steps in this run and delivery attempts + recorded by the control plane. Failed deliveries listed below can be + retried here.
@@ -340,13 +379,19 @@ function RunContextWebhooksCard({ ) : (

- Not triggered by a webhook — invoked directly or by another reasoner. + Not triggered by a webhook — invoked directly or by another + reasoner.

)}
{/* OUTBOUND */} -
+

Outbound

@@ -362,7 +407,8 @@ function RunContextWebhooksCard({

) : (

- {steps} step{steps === 1 ? "" : "s"} with callbacks · {total} delivery + {steps} step{steps === 1 ? "" : "s"} with callbacks · {total}{" "} + delivery {total === 1 ? "" : "ies"} {succeeded > 0 ? ` · ${succeeded} succeeded` : ""} {failed > 0 ? ` · ${failed} failed` : ""} @@ -407,12 +453,17 @@ function RunContextWebhooksCard({ className="flex flex-wrap items-center justify-between gap-2 rounded-md bg-muted/40 px-2 py-1.5 text-micro-plus" >

-

+

{label}

{f.event_type} - {f.http_status != null ? ` · HTTP ${f.http_status}` : ""} + {f.http_status != null + ? ` · HTTP ${f.http_status}` + : ""}

@@ -478,16 +529,27 @@ export function RunDetailPage() { const { data: dag, isLoading, isError, error } = useRunDAG(runId); const cancelTreeMutation = useCancelWorkflowTree(); const pauseMutation = usePauseExecution(); + const restartMutation = useRestartExecution(); const resumeMutation = useResumeExecution(); + const saveGoldenMutation = useSaveGoldenRun(); const showRunNotification = useRunNotification(); const [cancelDialogOpen, setCancelDialogOpen] = useState(false); + const [forkDialogOpen, setForkDialogOpen] = useState(false); + const [forkExecutionId, setForkExecutionId] = useState(null); + const [forkReuse, setForkReuse] = useState< + "succeeded-before" | "all-succeeded" | "none" + >("succeeded-before"); + const [forkModel, setForkModel] = useState(""); + const [forkReason, setForkReason] = useState(""); const [lifecycleBusy, setLifecycleBusy] = useState< - null | "pause" | "resume" | "cancel" + null | "pause" | "resume" | "cancel" | "restart" | "fork" | "golden" >(null); const [selectedStepId, setSelectedStepId] = useState(null); const [viewMode, setViewMode] = useState<"trace" | "graph">("trace"); - const [surfaceTab, setSurfaceTab] = useState<"execution" | "logs">("execution"); + const [surfaceTab, setSurfaceTab] = useState<"execution" | "logs">( + "execution", + ); const participants = useMemo(() => { if (!dag) { @@ -527,6 +589,205 @@ export function RunDetailPage() { const isSingleStep = (dag?.total_nodes ?? 0) <= 1; const shortId = runId ? runId.substring(0, 12) : "—"; + const rootNodeForActions = + dag?.timeline.find((n) => n.workflow_depth === 0) ?? dag?.timeline[0]; + const restartNodeForActions = pickRestartNode(dag?.timeline); + const actionRunLabel = + dag?.workflow_name?.trim() || + (rootNodeForActions?.agent_node_id && rootNodeForActions?.reasoner_id + ? `${rootNodeForActions.agent_node_id}.${rootNodeForActions.reasoner_id}` + : (rootNodeForActions?.reasoner_id ?? "run")); + const actionRootExecutionId = rootNodeForActions?.execution_id; + const actionRestartExecutionId = + restartNodeForActions?.execution_id ?? actionRootExecutionId; + const lineage = dag?.lineage; + const golden = dag?.golden; + + const handleRestartFromRoot = async ( + reuse: "succeeded-before" | "all-succeeded" | "none" = "succeeded-before", + ) => { + if (!actionRestartExecutionId || !runId) return; + setLifecycleBusy(reuse === "none" ? "fork" : "restart"); + try { + const targetExecutionId = forkExecutionId ?? actionRestartExecutionId; + const restarted = await restartMutation.mutateAsync({ + executionId: targetExecutionId, + request: { + scope: "workflow", + reuse, + fork: reuse === "none", + }, + }); + showRunNotification({ + type: "success", + eventKind: "resume", + title: reuse === "none" ? "Fresh run started" : "Restarted", + message: `${actionRunLabel} started as ${restarted.run_id.slice(0, 8)}.`, + runId: restarted.run_id, + runLabel: actionRunLabel, + }); + navigate(`/runs/${restarted.run_id}`); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Restart failed", + message: err instanceof Error ? err.message : "Unable to restart run.", + runId, + runLabel: actionRunLabel, + }); + } finally { + setLifecycleBusy(null); + } + }; + + const handleStartFork = async () => { + if (!actionRestartExecutionId || !runId) return; + setLifecycleBusy("fork"); + try { + const context = forkModel.trim() + ? { model: forkModel.trim() } + : undefined; + const restarted = await restartMutation.mutateAsync({ + executionId: forkExecutionId ?? actionRestartExecutionId, + request: { + scope: "workflow", + reuse: forkReuse, + fork: true, + reason: forkReason.trim() || undefined, + context, + }, + }); + setForkDialogOpen(false); + setForkExecutionId(null); + showRunNotification({ + type: "success", + eventKind: "resume", + title: "Fork started", + message: `${actionRunLabel} forked as ${restarted.run_id.slice(0, 8)}.`, + runId: restarted.run_id, + runLabel: actionRunLabel, + }); + navigate(`/runs/${restarted.run_id}`); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Fork failed", + message: err instanceof Error ? err.message : "Unable to start fork.", + runId, + runLabel: actionRunLabel, + }); + } finally { + setLifecycleBusy(null); + } + }; + + const handleRestartWorkflowFromNode = async (node: { + execution_id: string; + reasoner_id: string; + }) => { + if (!runId) return; + setLifecycleBusy("restart"); + try { + const restarted = await restartMutation.mutateAsync({ + executionId: node.execution_id, + request: { scope: "workflow", reuse: "succeeded-before" }, + }); + showRunNotification({ + type: "success", + eventKind: "resume", + title: "Restarted", + message: `${node.reasoner_id} started as ${restarted.run_id.slice(0, 8)}.`, + runId: restarted.run_id, + runLabel: node.reasoner_id, + }); + navigate(`/runs/${restarted.run_id}`); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Restart failed", + message: + err instanceof Error + ? err.message + : "Unable to restart from this node.", + runId, + runLabel: node.reasoner_id, + }); + } finally { + setLifecycleBusy(null); + } + }; + + const handleRerunNodeOnly = async (node: { + execution_id: string; + reasoner_id: string; + }) => { + if (!runId) return; + setLifecycleBusy("restart"); + try { + const restarted = await restartMutation.mutateAsync({ + executionId: node.execution_id, + request: { scope: "execution", reuse: "succeeded-before" }, + }); + showRunNotification({ + type: "success", + eventKind: "resume", + title: "Node rerun started", + message: `${node.reasoner_id} started as ${restarted.run_id.slice(0, 8)}.`, + runId: restarted.run_id, + runLabel: node.reasoner_id, + }); + navigate(`/runs/${restarted.run_id}`); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Rerun failed", + message: + err instanceof Error ? err.message : "Unable to rerun this node.", + runId, + runLabel: node.reasoner_id, + }); + } finally { + setLifecycleBusy(null); + } + }; + + const handleSaveGolden = async () => { + if (!runId) return; + setLifecycleBusy("golden"); + try { + await saveGoldenMutation.mutateAsync({ + runId, + name: dag?.workflow_name || actionRunLabel, + tags: ["regression"], + }); + showRunNotification({ + type: "success", + eventKind: "resume", + title: "Golden run saved", + message: `${actionRunLabel} is available for future forks.`, + runId, + runLabel: actionRunLabel, + }); + void queryClient.invalidateQueries({ queryKey: ["run-dag", runId] }); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Save failed", + message: + err instanceof Error ? err.message : "Unable to save golden run.", + runId, + runLabel: actionRunLabel, + }); + } finally { + setLifecycleBusy(null); + } + }; + // ─── Loading state ────────────────────────────────────────────────────────── if (isLoading) { return ( @@ -557,9 +818,7 @@ export function RunDetailPage() { if (isError) { return (
-

- Run {shortId} -

+

Run {shortId}

{error instanceof Error ? error.message : "Failed to load run"}
@@ -571,15 +830,16 @@ export function RunDetailPage() { if (!dag) { return (
-

- Run {shortId} -

-

No data available for this run.

+

Run {shortId}

+

+ No data available for this run. +

); } - const rootNode = dag.timeline.find((n) => n.workflow_depth === 0) ?? dag.timeline[0]; + const rootNode = + dag.timeline.find((n) => n.workflow_depth === 0) ?? dag.timeline[0]; const rootExecution: WorkflowExecution = { id: 0, workflow_id: workflowIdForVc, @@ -605,26 +865,36 @@ export function RunDetailPage() { error_message: undefined, retry_count: 0, created_at: rootNode?.started_at ?? dag.timeline[0]?.started_at ?? "", - updated_at: rootNode?.completed_at ?? rootNode?.started_at ?? dag.timeline[0]?.started_at ?? "", + updated_at: + rootNode?.completed_at ?? + rootNode?.started_at ?? + dag.timeline[0]?.started_at ?? + "", notes: [], webhook_registered: false, webhook_events: [], }; const selectedNode = - dag.timeline.find((node) => node.execution_id === selectedStepId) ?? rootNode; + dag.timeline.find((node) => node.execution_id === selectedStepId) ?? + rootNode; const selectedExecution: WorkflowExecution = { ...rootExecution, execution_id: selectedNode?.execution_id ?? rootExecution.execution_id, agent_node_id: selectedNode?.agent_node_id ?? rootExecution.agent_node_id, - workflow_depth: selectedNode?.workflow_depth ?? rootExecution.workflow_depth, + workflow_depth: + selectedNode?.workflow_depth ?? rootExecution.workflow_depth, reasoner_id: selectedNode?.reasoner_id ?? rootExecution.reasoner_id, - status: normalizeExecutionStatus(selectedNode?.status ?? dag.workflow_status), + status: normalizeExecutionStatus( + selectedNode?.status ?? dag.workflow_status, + ), started_at: selectedNode?.started_at ?? rootExecution.started_at, completed_at: selectedNode?.completed_at ?? rootExecution.completed_at, duration_ms: selectedNode?.duration_ms ?? rootExecution.duration_ms, created_at: selectedNode?.started_at ?? rootExecution.created_at, updated_at: - selectedNode?.completed_at ?? selectedNode?.started_at ?? rootExecution.updated_at, + selectedNode?.completed_at ?? + selectedNode?.started_at ?? + rootExecution.updated_at, }; const workflowId = dag.root_workflow_id || runId || ""; @@ -634,10 +904,7 @@ export function RunDetailPage() { vcChain?.workflow_vc?.issuer_did?.trim() || ""; - const runTitle = - dag.workflow_name?.trim() || - rootNode?.reasoner_id || - "Run"; + const runTitle = dag.workflow_name?.trim() || rootNode?.reasoner_id || "Run"; const runTitleDisplay = truncateEnd(runTitle, RUN_DETAIL_TITLE_MAX_CHARS); const metaParts: string[] = []; @@ -664,7 +931,7 @@ export function RunDetailPage() {

{runTitleDisplay} @@ -693,9 +960,49 @@ export function RunDetailPage() { rootNodeForBadge?.status ?? dag.workflow_status, ); return ( - + ); })()} + {golden ? ( + + + Golden + + ) : null} + {lineage?.source_run_id ? ( + + + {lineage.kind === "fork" ? "Forked" : "Restarted"} + + ) : null}

{sessionTrim ? ( @@ -759,6 +1066,90 @@ export function RunDetailPage() {
+ {actionRootExecutionId && isTerminalStatus(dag.workflow_status) ? ( + <> + + + + + + + + Recovery + + + setForkDialogOpen(true)} + > + + Fork with changes + + void handleRestartFromRoot("none")} + > + + Fresh rerun + + {lineage?.source_run_id ? ( + + navigate( + `/runs/compare?a=${lineage.source_run_id}&b=${runId}`, + ) + } + > + + Compare with source + + ) : null} + + void handleSaveGolden()} + > + + {golden ? "Golden run saved" : "Save as golden run"} + + + + + ) : null} + {/* Replay */}
+ { + setForkDialogOpen(open); + if (!open) setForkExecutionId(null); + }} + > + + + Fork with changes + +
+
+ + +
+
+ + setForkModel(event.target.value)} + placeholder="openrouter/openai/gpt-oss-120b" + /> +
+
+ + setForkReason(event.target.value)} + placeholder="Compare model behavior" + /> +
+
+ + + + +
+
+ {/* Cancellation / pause registered strip — shown when the root execution is cancelled or paused by the user but at least one child is still reporting 'running'. This is the honest depiction @@ -1103,8 +1583,7 @@ export function RunDetailPage() { cannot be killed mid-dispatch and will finish naturally. */} {(() => { const rootNodeForStrip = - dag.timeline.find((n) => n.workflow_depth === 0) ?? - dag.timeline[0]; + dag.timeline.find((n) => n.workflow_depth === 0) ?? dag.timeline[0]; const rootStatus = normalizeExecutionStatus(rootNodeForStrip?.status); if (rootStatus !== "cancelled" && rootStatus !== "paused") return null; const stillRunning = dag.timeline.filter( @@ -1148,7 +1627,9 @@ export function RunDetailPage() { failures={dag.webhook_failures ?? []} onSelectStep={setSelectedStepId} onRefetchDag={() => { - void queryClient.invalidateQueries({ queryKey: ["run-dag", runId] }); + void queryClient.invalidateQueries({ + queryKey: ["run-dag", runId], + }); void queryClient.invalidateQueries({ queryKey: ["step-detail"] }); }} /> @@ -1245,10 +1726,20 @@ export function RunDetailPage() { className="h-full min-h-0 flex-1" workflowId={dag.root_workflow_id || runId || ""} dagData={dag} - selectedNodeIds={selectedStepId ? [selectedStepId] : undefined} + selectedNodeIds={ + selectedStepId ? [selectedStepId] : undefined + } onExecutionClick={(execution) => setSelectedStepId(execution.execution_id) } + onRestartWorkflowFromNode={ + handleRestartWorkflowFromNode + } + onRerunNodeOnly={handleRerunNodeOnly} + onForkFromNode={(execution) => { + setForkExecutionId(execution.execution_id); + setForkDialogOpen(true); + }} />
diff --git a/control-plane/web/client/src/pages/RunsPage.tsx b/control-plane/web/client/src/pages/RunsPage.tsx index df417b6ed..18ea14561 100644 --- a/control-plane/web/client/src/pages/RunsPage.tsx +++ b/control-plane/web/client/src/pages/RunsPage.tsx @@ -6,16 +6,22 @@ import { ArrowUp, Check, Copy, + GitBranch, Play, + Star, } from "lucide-react"; import { useQuery } from "@tanstack/react-query"; import { useRuns, useCancelWorkflowTree, usePauseExecution, + useRestartExecution, useResumeExecution, } from "@/hooks/queries"; -import type { WorkflowSummary } from "@/types/workflows"; +import type { + WorkflowDAGLightweightNode, + WorkflowSummary, +} from "@/types/workflows"; import { getStatusLabel, isTerminalStatus, @@ -44,6 +50,7 @@ import { } from "@/components/runs/RunLifecycleMenu"; import { StatusDot } from "@/components/ui/status-pill"; import { cn } from "@/lib/utils"; +import { statusTone } from "@/lib/theme"; import { Table, TableBody, @@ -92,9 +99,8 @@ import { useSidebar } from "@/components/ui/sidebar"; import { SortableHeaderCell } from "@/components/ui/CompactTable"; import { SourceIcon } from "@/components/triggers/SourceIcon"; import { getExecutionDetails } from "@/services/executionsApi"; -import { - JsonHighlightedPre, -} from "@/components/ui/json-syntax-highlight"; +import { getWorkflowDAGLightweight } from "@/services/workflowsApi"; +import { JsonHighlightedPre } from "@/components/ui/json-syntax-highlight"; import { formatAbsoluteStarted, formatDuration, @@ -181,7 +187,9 @@ function StartedAtCell({ run }: { run: WorkflowSummary }) {

Started

-

{absolute}

+

+ {absolute} +

{liveGranular ? "Live elapsed time (updates every second)." @@ -248,7 +256,9 @@ function DurationCell({ run }: { run: WorkflowSummary }) { // yet (e.g. queued and never dispatched), fall back to the dash. const startedMs = run.started_at ? new Date(run.started_at).getTime() : NaN; if (Number.isNaN(startedMs)) { - return ; + return ( + + ); } const elapsed = Math.max(0, now - startedMs); return ( @@ -428,6 +438,19 @@ function StatusMenuDot({ canonical }: { canonical: CanonicalStatus }) { const PAGE_SIZE_OPTIONS = [10, 25, 50, 100] as const; const DEFAULT_PAGE_SIZE = 25; +function pickRestartExecutionIdFromTimeline( + timeline: WorkflowDAGLightweightNode[] | undefined, + fallbackExecutionId: string, +): string { + const failed = timeline?.find((node) => { + const status = normalizeExecutionStatus(node.status); + return ( + status === "failed" || status === "timeout" || status === "cancelled" + ); + }); + return failed?.execution_id || fallbackExecutionId; +} + interface RunsPaginationBarProps { placement: "top" | "bottom"; totalCount: number; @@ -479,8 +502,8 @@ function RunsPaginationBar({ {totalCount === 0 ? 0 : (page - 1) * pageSize + pageRowCount} {" "} - of {totalCount}{" "} - run{totalCount === 1 ? "" : "s"} + of {totalCount} run + {totalCount === 1 ? "" : "s"}

@@ -556,6 +579,7 @@ export function RunsPage() { const [searchParams] = useSearchParams(); const cancelTreeMutation = useCancelWorkflowTree(); const pauseMutation = usePauseExecution(); + const restartMutation = useRestartExecution(); const resumeMutation = useResumeExecution(); const showSuccess = useSuccessNotification(); const showError = useErrorNotification(); @@ -620,7 +644,13 @@ export function RunsPage() { clearPending(execId); } }, - [pauseMutation, showRunNotification, markPending, clearPending, runDisplayLabel], + [ + pauseMutation, + showRunNotification, + markPending, + clearPending, + runDisplayLabel, + ], ); const handleResumeRun = useCallback( @@ -651,7 +681,13 @@ export function RunsPage() { clearPending(execId); } }, - [resumeMutation, showRunNotification, markPending, clearPending, runDisplayLabel], + [ + resumeMutation, + showRunNotification, + markPending, + clearPending, + runDisplayLabel, + ], ); const handleCancelRun = useCallback( @@ -693,7 +729,56 @@ export function RunsPage() { clearPending(pendingKey); } }, - [cancelTreeMutation, showRunNotification, markPending, clearPending, runDisplayLabel], + [ + cancelTreeMutation, + showRunNotification, + markPending, + clearPending, + runDisplayLabel, + ], + ); + + const handleRestartRun = useCallback( + async (run: WorkflowSummary) => { + const rootExecId = run.root_execution_id; + if (!rootExecId) return; + markPending(rootExecId); + try { + const dag = await getWorkflowDAGLightweight(run.run_id); + const restartExecutionId = pickRestartExecutionIdFromTimeline( + dag.timeline, + rootExecId, + ); + const restarted = await restartMutation.mutateAsync(restartExecutionId); + showRunNotification({ + type: "success", + eventKind: "resume", + title: "Restarted", + message: `${runDisplayLabel(run)} started as ${restarted.run_id.slice(0, 8)} with prior successful calls available for replay.`, + runId: restarted.run_id, + runLabel: runDisplayLabel(run), + }); + } catch (err) { + showRunNotification({ + type: "error", + eventKind: "error", + title: "Restart failed", + message: + err instanceof Error ? err.message : "Unable to restart run.", + runId: run.run_id, + runLabel: runDisplayLabel(run), + }); + } finally { + clearPending(rootExecId); + } + }, + [ + restartMutation, + showRunNotification, + markPending, + clearPending, + runDisplayLabel, + ], ); // Bulk confirmation dialog state — a single shared AlertDialog for the @@ -708,14 +793,19 @@ export function RunsPage() { return { left: pad, right: pad } as const; } const w = - sidebarState === "collapsed" ? "var(--sidebar-width-icon)" : "var(--sidebar-width)"; + sidebarState === "collapsed" + ? "var(--sidebar-width-icon)" + : "var(--sidebar-width)"; return { left: `calc(${w} + ${pad})`, right: pad } as const; }, [isMobile, sidebarState]); // filter state const [timeRange, setTimeRange] = useState("all"); /** Empty set = all statuses (no restriction). */ - const [selectedStatuses, setSelectedStatuses] = useState>(() => new Set()); + const [selectedStatuses, setSelectedStatuses] = useState>( + () => new Set(), + ); + const [showGoldenOnly, setShowGoldenOnly] = useState(false); // Seed search from ?search= URL param so deep links from the trigger sheet's // Dispatch target chip (e.g. /runs?search=summarize_issue) land pre-filtered. const initialSearch = searchParams.get("search") ?? ""; @@ -791,13 +881,7 @@ export function RunsPage() { sortOrder, }; } - }, [ - timeRange, - statusFilterKey, - debouncedSearch, - sortBy, - sortOrder, - ]); + }, [timeRange, statusFilterKey, debouncedSearch, sortBy, sortOrder]); const filters = useMemo( () => ({ @@ -841,6 +925,7 @@ export function RunsPage() { const hasActiveFilters = timeRange !== "all" || selectedStatuses.size > 0 || + showGoldenOnly || search.trim() !== "" || debouncedSearch.trim() !== ""; @@ -851,6 +936,7 @@ export function RunsPage() { } setTimeRange("all"); setSelectedStatuses(new Set()); + setShowGoldenOnly(false); setSearch(""); setDebouncedSearch(""); setSelected(new Set()); @@ -867,13 +953,18 @@ export function RunsPage() { /** Server applies status when exactly one is selected; otherwise narrow here (multi-status OR). */ const filteredRuns = useMemo(() => { let rows = pageRows; + if (showGoldenOnly) { + rows = rows.filter((r) => Boolean(r.golden)); + } if (selectedStatuses.size > 1) { rows = rows.filter((r) => - selectedStatuses.has(normalizeExecutionStatus(r.root_execution_status ?? r.status)), + selectedStatuses.has( + normalizeExecutionStatus(r.root_execution_status ?? r.status), + ), ); } return rows; - }, [pageRows, selectedStatuses]); + }, [pageRows, selectedStatuses, showGoldenOnly]); // row click const handleRowClick = useCallback( @@ -884,21 +975,18 @@ export function RunsPage() { ); // checkbox selection - const toggleSelect = useCallback( - (runId: string, e: React.MouseEvent) => { - e.stopPropagation(); - setSelected((prev) => { - const next = new Set(prev); - if (next.has(runId)) { - next.delete(runId); - } else { - next.add(runId); - } - return next; - }); - }, - [], - ); + const toggleSelect = useCallback((runId: string, e: React.MouseEvent) => { + e.stopPropagation(); + setSelected((prev) => { + const next = new Set(prev); + if (next.has(runId)) { + next.delete(runId); + } else { + next.add(runId); + } + return next; + }); + }, []); const toggleSelectAll = useCallback(() => { if (selected.size === filteredRuns.length && filteredRuns.length > 0) { @@ -911,6 +999,14 @@ export function RunsPage() { const allSelected = filteredRuns.length > 0 && selected.size === filteredRuns.length; const someSelected = selected.size > 0 && !allSelected; + const hasClientSideRowFilter = showGoldenOnly || selectedStatuses.size > 1; + const visibleTotalCount = hasClientSideRowFilter + ? filteredRuns.length + : totalCount; + const visibleTotalPages = hasClientSideRowFilter ? 1 : totalPages; + const visiblePageRowCount = hasClientSideRowFilter + ? filteredRuns.length + : pageRows.length; const handleFilterChange = useCallback( (setter: (v: string) => void) => (value: string) => { @@ -967,6 +1063,22 @@ export function RunsPage() { onSelectedChange={handleStatusesFilterChange} pluralLabel={(n) => `${n} statuses`} /> +
-
- - - - {/* Checkbox */} - - - - {/* Status first — most scannable */} - - {/* Target + short run id (full id via copy) */} - - - Target - - - - {/* Steps — complexity */} - - {/* Duration — performance */} - - {/* Started — when (relative) */} - - {/* Lifecycle actions (kebab) — right-anchored, no header label */} - - - - - {loadingInitial ? ( - - - Loading runs… - - - ) : isError ? ( - - - {error instanceof Error ? error.message : "Failed to load runs"} - - - ) : filteredRuns.length === 0 ? ( +
+
+ - -
- -

No runs found

-

- {pageRows.length > 0 && selectedStatuses.size > 0 - ? "No rows match the current status filters on this page. Try clearing filters or another page." - : timeRange !== "all" - ? "Try expanding the time range" - : "Execute a reasoner to create your first run"} -

-
-
-
- ) : ( - filteredRuns.map((run) => ( - + + + {/* Status first — most scannable */} + + + + {/* Target + short run id (full id via copy) */} + + + Target + + + + {/* Steps — complexity */} + + + + {/* Duration — performance */} + + + + {/* Started — when (relative) */} + + + + {/* Lifecycle actions (kebab) — right-anchored, no header label */} + - )) - )} - -
-
+ + + + {loadingInitial ? ( + + + Loading runs… + + + ) : isError ? ( + + + {error instanceof Error + ? error.message + : "Failed to load runs"} + + + ) : filteredRuns.length === 0 ? ( + + +
+ +

+ No runs found +

+

+ {pageRows.length > 0 && selectedStatuses.size > 0 + ? "No rows match the current status filters on this page. Try clearing filters or another page." + : timeRange !== "all" + ? "Try expanding the time range" + : "Execute a reasoner to create your first run"} +

+
+
+
+ ) : ( + filteredRuns.map((run) => ( + + )) + )} +
+ +
void; onResumeRun: (run: WorkflowSummary) => void; onCancelRun: (run: WorkflowSummary) => void; + onRestartRun: (run: WorkflowSummary) => void; } function RunRow({ @@ -1585,6 +1739,7 @@ function RunRow({ onPauseRun, onResumeRun, onCancelRun, + onRestartRun, }: RunRowProps) { const agentLabel = run.agent_id || run.agent_name || ""; const reasonerLabel = run.root_reasoner || run.display_name || "—"; @@ -1601,14 +1756,17 @@ function RunRow({ tabIndex={0} onClick={() => onRowClick(run)} onKeyDown={(e) => { - if (e.key === 'Enter' || e.key === ' ') { + if (e.key === "Enter" || e.key === " ") { e.preventDefault(); onRowClick(run); } }} > {/* Checkbox */} - onToggleSelect(run.run_id, e)}> + onToggleSelect(run.run_id, e)} + > {/* Target name, then inline copy-chip for run id (no sub-column) */} - +
- + {agentLabel ? ( <> {agentLabel}. @@ -1677,7 +1831,11 @@ function RunRow({ aria-label="Preview run input and output" onClick={(e) => e.stopPropagation()} > - + ) : null} + {run.golden ? ( + + + Golden + + ) : null} + {run.lineage?.source_run_id ? ( + e.stopPropagation()} + > + + {run.lineage.kind === "fork" ? "Forked" : "Restarted"} + + ) : null} + + +
), })); @@ -204,6 +223,29 @@ vi.mock("@/components/ui/alert-dialog", () => ({ AlertDialogTitle: ({ children }: React.PropsWithChildren) =>
{children}
, })); +vi.mock("@/components/ui/dialog", () => ({ + Dialog: ({ children, open }: React.PropsWithChildren<{ open?: boolean }>) => + open ?
{children}
: null, + DialogContent: ({ children }: React.PropsWithChildren) =>
{children}
, + DialogFooter: ({ children }: React.PropsWithChildren) =>
{children}
, + DialogHeader: ({ children }: React.PropsWithChildren) =>
{children}
, + DialogTitle: ({ children }: React.PropsWithChildren) =>
{children}
, +})); + +vi.mock("@/components/ui/input", () => ({ + Input: (props: React.InputHTMLAttributes) => , +})); + +vi.mock("@/components/ui/select", () => ({ + Select: ({ children }: React.PropsWithChildren<{ value?: string; onValueChange?: (value: string) => void }>) => ( +
{children}
+ ), + SelectContent: ({ children }: React.PropsWithChildren) =>
{children}
, + SelectItem: ({ children }: React.PropsWithChildren<{ value: string }>) =>
{children}
, + SelectTrigger: ({ children }: React.PropsWithChildren) =>
{children}
, + SelectValue: () => select value, +})); + vi.mock("@/components/ui/dropdown-menu", () => ({ DropdownMenu: ({ children }: React.PropsWithChildren) =>
{children}
, DropdownMenuTrigger: ({ children }: React.PropsWithChildren) =>
{children}
, @@ -342,6 +384,8 @@ describe("RunDetailPage", () => { }); state.pauseMutateAsync.mockReset(); state.resumeMutateAsync.mockReset(); + state.restartMutateAsync.mockReset(); + state.saveGoldenMutateAsync.mockReset(); state.showRunNotification.mockReset(); state.getExecutionDetails.mockReset(); state.retryExecutionWebhook.mockReset(); @@ -594,6 +638,134 @@ describe("RunDetailPage", () => { ); }); + it("runs restart, fresh rerun, and fork lifecycle actions", async () => { + state.runDag = { + data: { + ...buildDag(), + workflow_status: "failed", + timeline: [ + { ...buildDag().timeline[0], status: "succeeded" }, + { ...buildDag().timeline[1], status: "failed" }, + ], + }, + isLoading: false, + isError: false, + error: null, + }; + state.restartMutateAsync.mockResolvedValue({ run_id: "run-new-123456", execution_id: "exec-new" }); + + renderPage(); + expect(await screen.findByText("Run Alpha")).toBeInTheDocument(); + + fireEvent.click(screen.getByText("Restart run")); + await waitFor(() => { + expect(state.restartMutateAsync).toHaveBeenCalledWith({ + executionId: "exec-2", + request: { + scope: "workflow", + reuse: "succeeded-before", + fork: false, + }, + }); + }); + expect(state.showRunNotification).toHaveBeenCalledWith( + expect.objectContaining({ title: "Restarted", runId: "run-new-123456" }), + ); + expect(state.navigateSpy).toHaveBeenCalledWith("/runs/run-new-123456"); + + fireEvent.click(screen.getByText("Fresh rerun")); + await waitFor(() => { + expect(state.restartMutateAsync).toHaveBeenCalledWith({ + executionId: "exec-2", + request: { + scope: "workflow", + reuse: "none", + fork: true, + }, + }); + }); + + fireEvent.click(screen.getByText("Fork with changes")); + fireEvent.change(screen.getByPlaceholderText("openrouter/openai/gpt-oss-120b"), { + target: { value: "google/gemini-3.1-flash-lite" }, + }); + fireEvent.change(screen.getByPlaceholderText("Compare model behavior"), { + target: { value: "compare flash lite" }, + }); + fireEvent.click(screen.getByText("Start fork")); + await waitFor(() => { + expect(state.restartMutateAsync).toHaveBeenCalledWith({ + executionId: "exec-2", + request: { + scope: "workflow", + reuse: "succeeded-before", + fork: true, + reason: "compare flash lite", + context: { model: "google/gemini-3.1-flash-lite" }, + }, + }); + }); + expect(state.showRunNotification).toHaveBeenCalledWith( + expect.objectContaining({ title: "Fork started", runId: "run-new-123456" }), + ); + }); + + it("runs restart actions from graph node callbacks", async () => { + state.runDag = { + data: { + ...buildDag(), + workflow_status: "failed", + timeline: [ + { ...buildDag().timeline[0], status: "succeeded" }, + { ...buildDag().timeline[1], status: "failed" }, + ], + }, + isLoading: false, + isError: false, + error: null, + }; + state.restartMutateAsync + .mockResolvedValueOnce({ run_id: "run-node-restart", execution_id: "exec-new" }) + .mockResolvedValueOnce({ run_id: "run-node-rerun", execution_id: "exec-new-2" }) + .mockRejectedValueOnce(new Error("node restart failed")); + + renderPage(); + expect(await screen.findByText("Run Alpha")).toBeInTheDocument(); + + fireEvent.click(screen.getByText("Graph")); + fireEvent.click(await screen.findByText("Graph restart worker")); + await waitFor(() => { + expect(state.restartMutateAsync).toHaveBeenCalledWith({ + executionId: "exec-2", + request: { scope: "workflow", reuse: "succeeded-before" }, + }); + }); + expect(state.showRunNotification).toHaveBeenCalledWith( + expect.objectContaining({ title: "Restarted", runId: "run-node-restart" }), + ); + + fireEvent.click(screen.getByText("Graph rerun worker")); + await waitFor(() => { + expect(state.restartMutateAsync).toHaveBeenCalledWith({ + executionId: "exec-2", + request: { scope: "execution", reuse: "succeeded-before" }, + }); + }); + expect(state.showRunNotification).toHaveBeenCalledWith( + expect.objectContaining({ title: "Node rerun started", runId: "run-node-rerun" }), + ); + + fireEvent.click(screen.getByText("Graph restart worker")); + await waitFor(() => { + expect(state.showRunNotification).toHaveBeenCalledWith( + expect.objectContaining({ title: "Restart failed", message: "node restart failed" }), + ); + }); + + fireEvent.click(screen.getByText("Graph fork worker")); + expect(screen.getAllByText("Fork with changes").length).toBeGreaterThan(0); + }); + it("notifies with steps-cancelled message when cancel-tree succeeds with cancelled_count > 0", async () => { state.runDag = { data: buildDag(), diff --git a/control-plane/web/client/src/test/pages/RunsPage.test.tsx b/control-plane/web/client/src/test/pages/RunsPage.test.tsx index eb8f3bb27..7b366d752 100644 --- a/control-plane/web/client/src/test/pages/RunsPage.test.tsx +++ b/control-plane/web/client/src/test/pages/RunsPage.test.tsx @@ -1,6 +1,12 @@ // @ts-nocheck import * as React from "react"; -import { act, fireEvent, render, screen, waitFor } from "@testing-library/react"; +import { + act, + fireEvent, + render, + screen, + waitFor, +} from "@testing-library/react"; import userEvent from "@testing-library/user-event"; import { beforeEach, describe, expect, it, vi } from "vitest"; @@ -10,6 +16,7 @@ const { useRunsMock, cancelTreeMutationMock, pauseMutationMock, + restartMutationMock, resumeMutationMock, useQueryMock, showSuccessMock, @@ -17,12 +24,14 @@ const { showWarningMock, showRunNotificationMock, clipboardWriteTextMock, + getWorkflowDAGLightweightMock, } = vi.hoisted(() => ({ navigateMock: vi.fn(), searchParamsState: { value: new URLSearchParams() }, useRunsMock: vi.fn(), cancelTreeMutationMock: vi.fn(), pauseMutationMock: vi.fn(), + restartMutationMock: vi.fn(), resumeMutationMock: vi.fn(), useQueryMock: vi.fn(), showSuccessMock: vi.fn(), @@ -30,6 +39,7 @@ const { showWarningMock: vi.fn(), showRunNotificationMock: vi.fn(), clipboardWriteTextMock: vi.fn(), + getWorkflowDAGLightweightMock: vi.fn(), })); vi.mock("react-router-dom", () => ({ @@ -39,7 +49,9 @@ vi.mock("react-router-dom", () => ({ to, children, ...props - }: React.PropsWithChildren<{ to: string } & React.AnchorHTMLAttributes>) => ( + }: React.PropsWithChildren< + { to: string } & React.AnchorHTMLAttributes + >) => ( {children} @@ -54,10 +66,15 @@ vi.mock("@/services/executionsApi", () => ({ getExecutionDetails: vi.fn(), })); +vi.mock("@/services/workflowsApi", () => ({ + getWorkflowDAGLightweight: getWorkflowDAGLightweightMock, +})); + vi.mock("@/hooks/queries", () => ({ useRuns: useRunsMock, useCancelWorkflowTree: () => ({ mutateAsync: cancelTreeMutationMock }), usePauseExecution: () => ({ mutateAsync: pauseMutationMock }), + useRestartExecution: () => ({ mutateAsync: restartMutationMock }), useResumeExecution: () => ({ mutateAsync: resumeMutationMock }), })); @@ -76,9 +93,9 @@ vi.mock("lucide-react", async (importOriginal) => { const actual = await importOriginal(); const ReactModule = await vi.importActual("react"); const makeIcon = (name: string) => - ReactModule.forwardRef((props, ref) => ( - - )); + ReactModule.forwardRef( + (props, ref) => , + ); return { ...actual, @@ -92,10 +109,20 @@ vi.mock("lucide-react", async (importOriginal) => { }); vi.mock("@/components/ui/alert-dialog", () => ({ - AlertDialog: ({ open, children }: { open?: boolean; children: React.ReactNode }) => - open ?
{children}
: null, + AlertDialog: ({ + open, + children, + }: { + open?: boolean; + children: React.ReactNode; + }) => (open ?
{children}
: null), AlertDialogAction: ({ children, onClick, disabled, className }: any) => ( - ), @@ -104,19 +131,31 @@ vi.mock("@/components/ui/alert-dialog", () => ({ {children} ), - AlertDialogContent: ({ children }: { children: React.ReactNode }) =>
{children}
, - AlertDialogDescription: ({ children }: { children: React.ReactNode }) =>
{children}
, - AlertDialogFooter: ({ children }: { children: React.ReactNode }) =>
{children}
, - AlertDialogHeader: ({ children }: { children: React.ReactNode }) =>
{children}
, - AlertDialogTitle: ({ children }: { children: React.ReactNode }) =>
{children}
, + AlertDialogContent: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + AlertDialogDescription: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + AlertDialogFooter: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + AlertDialogHeader: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + AlertDialogTitle: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), })); vi.mock("@/components/runs/RunLifecycleMenu", () => ({ CANCEL_RUN_COPY: { - title: (count: number) => (count > 1 ? `Cancel ${count} runs?` : "Cancel this run?"), + title: (count: number) => + count > 1 ? `Cancel ${count} runs?` : "Cancel this run?", description: "Nodes currently executing will finish their current step — only pending nodes will be stopped. Any in-flight work will be discarded. This cannot be undone.", - confirmLabel: (count: number) => (count > 1 ? `Cancel ${count} runs` : "Cancel run"), + confirmLabel: (count: number) => + count > 1 ? `Cancel ${count} runs` : "Cancel run", keepLabel: "Keep running", }, RunLifecycleMenu: ({ @@ -124,11 +163,13 @@ vi.mock("@/components/runs/RunLifecycleMenu", () => ({ onPause, onResume, onCancel, + onRestart, }: { run: (typeof baseRuns)[number]; onPause: (run: (typeof baseRuns)[number]) => void; onResume: (run: (typeof baseRuns)[number]) => void; onCancel: (run: (typeof baseRuns)[number]) => void; + onRestart?: (run: (typeof baseRuns)[number]) => void; }) => (
+ {onRestart ? ( + + ) : null}
), })); @@ -149,11 +195,17 @@ vi.mock("@/components/ui/status-pill", () => ({ })); vi.mock("@/components/ui/table", () => ({ - Table: ({ children }: { children: React.ReactNode }) => {children}
, - TableBody: ({ children }: { children: React.ReactNode }) => {children}, + Table: ({ children }: { children: React.ReactNode }) => ( + {children}
+ ), + TableBody: ({ children }: { children: React.ReactNode }) => ( + {children} + ), TableCell: ({ children, ...props }: any) => {children}, TableHead: ({ children, ...props }: any) => {children}, - TableHeader: ({ children }: { children: React.ReactNode }) => {children}, + TableHeader: ({ children }: { children: React.ReactNode }) => ( + {children} + ), TableRow: ({ children, ...props }: any) => {children}, })); @@ -181,9 +233,12 @@ vi.mock("@/components/ui/checkbox", () => ({ })); vi.mock("@/components/ui/card", () => ({ - Card: ({ children, variant: _variant, interactive: _interactive, ...props }: any) => ( -
{children}
- ), + Card: ({ + children, + variant: _variant, + interactive: _interactive, + ...props + }: any) =>
{children}
, })); vi.mock("@/components/ui/filter-combobox", () => ({ @@ -206,7 +261,12 @@ vi.mock("@/components/ui/filter-combobox", () => ({ })); vi.mock("@/components/ui/filter-multi-combobox", () => ({ - FilterMultiCombobox: ({ label, options, selected, onSelectedChange }: any) => ( + FilterMultiCombobox: ({ + label, + options, + selected, + onSelectedChange, + }: any) => (
{options.map((option: { value: string; label: string }) => ( ), @@ -287,9 +371,15 @@ vi.mock("@/components/ui/pagination", () => ({ })); vi.mock("@/components/ui/select", () => ({ - Select: ({ children }: { children: React.ReactNode }) =>
{children}
, - SelectContent: ({ children }: { children: React.ReactNode }) =>
{children}
, - SelectItem: ({ children }: { children: React.ReactNode }) =>
{children}
, + Select: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + SelectContent: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), + SelectItem: ({ children }: { children: React.ReactNode }) => ( +
{children}
+ ), SelectTrigger: ({ children, className, "aria-label": ariaLabel }: any) => (