diff --git a/CHANGELOG.md b/CHANGELOG.md index d0ee1edda5..74da8e63fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,12 @@ and this project adheres to ### Fixed +- `step:complete` no longer crashes with an unhandled `Postgrex.Error` (code + `22P05`) when an `output_dataclip` body contains a NUL byte. The dataclip + insert is rejected with a changeset error instead of raising, since + PostgreSQL cannot store the NUL byte (`\u0000`) in `text`/`jsonb`. + [#4893](https://github.com/OpenFn/lightning/issues/4893) + ## [2.16.8-pre] - 2026-06-18 ### Added diff --git a/lib/lightning/runs/handlers.ex b/lib/lightning/runs/handlers.ex index 2362eb16b5..5df29968b1 100644 --- a/lib/lightning/runs/handlers.ex +++ b/lib/lightning/runs/handlers.ex @@ -472,16 +472,45 @@ defmodule Lightning.Runs.Handlers do }, _run_options ) do - Dataclip.new(%{ - id: dataclip_id, - project_id: project_id, - body: output_dataclip |> Jason.decode!() |> ensure_map(), - type: :step_result - }) - |> Repo.insert() + changeset = + Dataclip.new(%{ + id: dataclip_id, + project_id: project_id, + body: output_dataclip |> Jason.decode!() |> ensure_map(), + type: :step_result + }) + + if contains_null_byte?(Ecto.Changeset.get_field(changeset, :body)) do + {:error, + Dataclip.new(%{ + id: dataclip_id, + project_id: project_id, + type: :step_result + }) + |> Ecto.Changeset.add_error( + :body, + "contains a null byte (\\u0000) which PostgreSQL cannot store" + )} + else + Repo.insert(changeset) + end end defp ensure_map(%{} = map), do: map defp ensure_map(value), do: %{"value" => value} + + defp contains_null_byte?(value) when is_binary(value), + do: String.contains?(value, <<0>>) + + defp contains_null_byte?(value) when is_map(value), + do: + Enum.any?(value, fn {k, v} -> + contains_null_byte?(k) or contains_null_byte?(v) + end) + + defp contains_null_byte?(value) when is_list(value), + do: Enum.any?(value, &contains_null_byte?/1) + + defp contains_null_byte?(_value), do: false end end diff --git a/test/lightning/runs_test.exs b/test/lightning/runs_test.exs index cf5c6b0ad6..c57b1058f0 100644 --- a/test/lightning/runs_test.exs +++ b/test/lightning/runs_test.exs @@ -385,6 +385,73 @@ defmodule Lightning.RunsTest do assert Jason.decode!(step.output_dataclip.body) == %{"foo" => "bar"} end + test "returns an error changeset when output dataclip contains a null byte" do + dataclip = insert(:dataclip) + %{triggers: [trigger], jobs: [job]} = workflow = insert(:simple_workflow) + + %{runs: [run]} = + work_order_for(trigger, workflow: workflow, dataclip: dataclip) + |> insert() + + step = + insert(:step, runs: [run], job: job, input_dataclip: dataclip) + + assert {:error, %Ecto.Changeset{}} = + Runs.complete_step(%{ + step_id: step.id, + reason: "success", + output_dataclip: ~s({"foo": "bar\\u0000baz"}), + output_dataclip_id: Ecto.UUID.generate(), + run_id: run.id, + project_id: workflow.project_id + }) + end + + test "returns an error changeset when output dataclip contains a nested null byte" do + dataclip = insert(:dataclip) + %{triggers: [trigger], jobs: [job]} = workflow = insert(:simple_workflow) + + %{runs: [run]} = + work_order_for(trigger, workflow: workflow, dataclip: dataclip) + |> insert() + + step = + insert(:step, runs: [run], job: job, input_dataclip: dataclip) + + assert {:error, %Ecto.Changeset{}} = + Runs.complete_step(%{ + step_id: step.id, + reason: "success", + output_dataclip: ~s({"items": ["ok", "bad\\u0000value"]}), + output_dataclip_id: Ecto.UUID.generate(), + run_id: run.id, + project_id: workflow.project_id + }) + end + + test "completes when a null byte is only in top-level configuration" do + dataclip = insert(:dataclip) + %{triggers: [trigger], jobs: [job]} = workflow = insert(:simple_workflow) + + %{runs: [run]} = + work_order_for(trigger, workflow: workflow, dataclip: dataclip) + |> insert() + + step = + insert(:step, runs: [run], job: job, input_dataclip: dataclip) + + assert {:ok, _step} = + Runs.complete_step(%{ + step_id: step.id, + reason: "success", + output_dataclip: + ~s({"configuration": {"token": "abc\\u0000def"}, "data": "ok"}), + output_dataclip_id: Ecto.UUID.generate(), + run_id: run.id, + project_id: workflow.project_id + }) + end + # Regression for #4800: dataclip inserts no longer build the search_vector # synchronously (the AFTER INSERT trigger was dropped). Saving an output # dataclip via the handler must succeed and the row must be retrievable with