PostHog · tatoalo · Apr 9, 2026 · Apr 2, 2026 · Apr 3, 2026 · Apr 9, 2026
diff --git a/products/tasks/backend/api.py b/products/tasks/backend/api.py
@@ -63,6 +63,7 @@
 from .services.connection_token import create_sandbox_connection_token
 from .stream.redis_stream import TaskRunRedisStream, TaskRunStreamError, get_task_run_stream_key
 from .temporal.client import execute_posthog_code_agent_relay_workflow, execute_task_processing_workflow
+from .temporal.process_task.utils import PrAuthorshipMode, cache_github_user_token, parse_run_state
 
 logger = logging.getLogger(__name__)
 
@@ -247,6 +248,10 @@ def run(self, request, pk=None, **kwargs):
         resume_from_run_id = request.validated_data.get("resume_from_run_id")
         pending_user_message = request.validated_data.get("pending_user_message")
         sandbox_environment_id = request.validated_data.get("sandbox_environment_id")
+        pr_authorship_mode = request.validated_data.get("pr_authorship_mode")
+        run_source = request.validated_data.get("run_source")
+        signal_report_id = request.validated_data.get("signal_report_id")
+        github_user_token = request.validated_data.get("github_user_token")
 
         extra_state = None
         if pending_user_message is not None:
@@ -259,15 +264,37 @@ def run(self, request, pk=None, **kwargs):
                 return Response({"detail": "Invalid resume_from_run_id"}, status=400)
 
             # Derive snapshot_external_id from the validated previous run
-            snapshot_ext_id = (previous_run.state or {}).get("snapshot_external_id")
+            prev_state = parse_run_state(previous_run.state)
             extra_state = extra_state or {}
             extra_state["resume_from_run_id"] = str(resume_from_run_id)
-            if snapshot_ext_id:
-                extra_state["snapshot_external_id"] = snapshot_ext_id
-
-            prev_sandbox_env_id = (previous_run.state or {}).get("sandbox_environment_id")
-            if prev_sandbox_env_id and sandbox_environment_id is None:
-                sandbox_environment_id = prev_sandbox_env_id
+            if prev_state.snapshot_external_id:
+                extra_state["snapshot_external_id"] = prev_state.snapshot_external_id
+
+            if prev_state.sandbox_environment_id and sandbox_environment_id is None:
+                sandbox_environment_id = prev_state.sandbox_environment_id
+
+            if pr_authorship_mode is None:
+                pr_authorship_mode = prev_state.pr_authorship_mode
+            if run_source is None:
+                run_source = prev_state.run_source
+            if signal_report_id is None:
+                signal_report_id = prev_state.signal_report_id
+            if branch is None and prev_state.pr_base_branch is not None:
+                branch = prev_state.pr_base_branch
+
+        for key, value in {
+            "pr_base_branch": branch,
+            "pr_authorship_mode": pr_authorship_mode,
+            "run_source": run_source,
+            "signal_report_id": signal_report_id,
+        }.items():
+            if value is not None:
+                extra_state = extra_state or {}
+                extra_state[key] = value
+
+        # Only require a user token when the task has a repo (no-repo cloud runs skip GitHub operations)
+        if pr_authorship_mode == PrAuthorshipMode.USER and task.repository and not github_user_token:
+            return Response({"detail": "github_user_token is required for user-authored cloud runs"}, status=400)
 
         if sandbox_environment_id is not None:
             sandbox_environment = SandboxEnvironment.objects.filter(id=sandbox_environment_id, team=task.team).first()
@@ -291,6 +318,9 @@ def run(self, request, pk=None, **kwargs):
 
         task_run = task.create_run(mode=mode, branch=branch, extra_state=extra_state)
 
+        if github_user_token and pr_authorship_mode == PrAuthorshipMode.USER:
+            cache_github_user_token(str(task_run.id), github_user_token)
+
         logger.info(f"Triggering workflow for task {task.id}, run {task_run.id}")
 
         self._trigger_workflow(task, task_run)
@@ -379,34 +409,50 @@ def update(self, request, *args, **kwargs):
     )
     def partial_update(self, request, *args, **kwargs):
         task_run = cast(TaskRun, self.get_object())
-        old_status = task_run.status
-
-        # Update fields from validated data
-        for key, value in request.validated_data.items():
-            setattr(task_run, key, value)
-
-        new_status = request.validated_data.get("status")
-        terminal_statuses = [
-            TaskRun.Status.COMPLETED,
-            TaskRun.Status.FAILED,
-            TaskRun.Status.CANCELLED,
-        ]
-
-        # Auto-set completed_at if status is completed or failed
-        if new_status in terminal_statuses:
-            if not task_run.completed_at:
-                task_run.completed_at = timezone.now()
-
-            # Signal Temporal workflow if status changed to terminal state
-            if old_status != new_status:
-                self._signal_workflow_completion(
-                    task_run,
-                    new_status,
-                    request.validated_data.get("error_message"),
-                )
+        has_output_merge = "output" in request.validated_data and isinstance(request.validated_data["output"], dict)
 
-        task_run.save()
-        self._post_slack_update_for_pr(task_run)
+        with transaction.atomic():
+            # Re-fetch with row lock when merging output to prevent concurrent
+            # PATCHes (e.g. branch sync + PR URL) from clobbering each other.
+            if has_output_merge:
+                task_run = TaskRun.objects.select_for_update().get(pk=task_run.pk)
+
+            old_status = task_run.status
+            old_pr_url = (task_run.output or {}).get("pr_url") if isinstance(task_run.output, dict) else None
+
+            # Update fields from validated data
+            for key, value in request.validated_data.items():
+                if key == "output" and isinstance(value, dict):
+                    existing_output = task_run.output if isinstance(task_run.output, dict) else {}
+                    setattr(task_run, key, {**existing_output, **value})
+                    continue
+                setattr(task_run, key, value)
+
+            new_status = request.validated_data.get("status")
+            terminal_statuses = [
+                TaskRun.Status.COMPLETED,
+                TaskRun.Status.FAILED,
+                TaskRun.Status.CANCELLED,
+            ]
+
+            # Auto-set completed_at if status is completed or failed
+            if new_status in terminal_statuses:
+                if not task_run.completed_at:
+                    task_run.completed_at = timezone.now()
+
+            task_run.save()
+
+        # Signal Temporal and post Slack updates after commit to avoid
+        # holding the row lock during external calls.
+        if new_status in terminal_statuses and old_status != new_status:
+            self._signal_workflow_completion(
+                task_run,
+                new_status,
+                request.validated_data.get("error_message"),
+            )
+        new_pr_url = (task_run.output or {}).get("pr_url") if isinstance(task_run.output, dict) else None
+        if new_pr_url and new_pr_url != old_pr_url:
+            self._post_slack_update_for_pr(task_run)
 
         return Response(TaskRunDetailSerializer(task_run, context=self.get_serializer_context()).data)
 
@@ -812,16 +858,15 @@ def connection_token(self, request, pk=None, **kwargs):
     )
     def command(self, request, pk=None, **kwargs):
         task_run = cast(TaskRun, self.get_object())
-        state = task_run.state or {}
+        run_state = parse_run_state(task_run.state)
 
-        sandbox_url = state.get("sandbox_url")
-        if not sandbox_url:
+        if not run_state.sandbox_url:
             return Response(
                 ErrorResponseSerializer({"error": "No active sandbox for this task run"}).data,
                 status=status.HTTP_400_BAD_REQUEST,
             )
 
-        if not self._is_valid_sandbox_url(sandbox_url):
+        if not self._is_valid_sandbox_url(run_state.sandbox_url):
             logger.warning(f"Blocked request to disallowed sandbox URL for task run {task_run.id}")
             return Response(
                 ErrorResponseSerializer({"error": "Invalid sandbox URL"}).data,
@@ -834,8 +879,6 @@ def command(self, request, pk=None, **kwargs):
             distinct_id=request.user.distinct_id,
         )
 
-        sandbox_connect_token = state.get("sandbox_connect_token")
-
         command_payload: dict = {
             "jsonrpc": request.validated_data["jsonrpc"],
             "method": request.validated_data["method"],
@@ -847,9 +890,9 @@ def command(self, request, pk=None, **kwargs):
 
         try:
             agent_response = self._proxy_command_to_agent_server(
-                sandbox_url=sandbox_url,
+                sandbox_url=run_state.sandbox_url,
                 connection_token=connection_token,
-                sandbox_connect_token=sandbox_connect_token,
+                sandbox_connect_token=run_state.sandbox_connect_token,
                 payload=command_payload,
             )
 

diff --git a/products/tasks/backend/sandbox/images/Dockerfile.sandbox-base b/products/tasks/backend/sandbox/images/Dockerfile.sandbox-base
@@ -7,7 +7,7 @@
 ENV TZ=UTC

 # Install system packages (expanded for coding environments)
 RUN apt-get update && \
    apt-get install -y \
    # Core tools
    curl \
@@ -51,15 +51,15 @@
    && rm -rf /var/lib/apt/lists/*

 # Install Node.js 24.x
 RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \
    apt-get install -y nodejs && \
    rm -rf /var/lib/apt/lists/*

 # Install additional language package managers
 RUN npm install -g yarn pnpm typescript ts-node nodemon

 # Install GitHub CLI
 RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
    chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
    echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
    apt-get update && \
@@ -88,7 +88,7 @@
 # Set up /scripts as a Node.js project and install @posthog/agent
 # COMMIT_HASH changes every build, busting the cache so we always get @latest
 ARG COMMIT_HASH
 RUN mkdir -p /scripts && \
    cd /scripts && \
    npm init -y && \
    CACHE_BUST=${COMMIT_HASH} npm install @posthog/agent@latest
@@ -101,9 +101,11 @@
     /tmp/install-skills.sh /tmp/skills && \
     rm -rf /tmp/install-skills.sh /tmp/skills
 
-# Configure git for commits - TODO: Use user's email and name
-RUN git config --global user.email "array@posthog.com" && \
-    git config --global user.name "Array"
+# Default git identity for bot-authored commits.
+# Runs configured for user-authored pull requests receive
+# GIT_AUTHOR_*/GIT_COMMITTER_* env vars that override these defaults.
+RUN git config --global user.email "code@posthog.com" && \
+    git config --global user.name "PostHog Code"
 
 # This is required for the Claude Code SDK to allow --dangerously-skip-permissions as the root user
 ENV IS_SANDBOX=1

diff --git a/products/tasks/backend/sandbox/images/Dockerfile.sandbox-notebook b/products/tasks/backend/sandbox/images/Dockerfile.sandbox-notebook
@@ -7,7 +7,7 @@
 ENV TZ=UTC

 # Install system packages (expanded for notebook environments)
 RUN apt-get update && \
    apt-get install -y \
    # Core tools
    curl \
@@ -51,7 +51,7 @@
    && rm -rf /var/lib/apt/lists/*

 # Install Node.js 24.x
 RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \
    apt-get install -y nodejs && \
    rm -rf /var/lib/apt/lists/*

@@ -59,7 +59,7 @@
 RUN npm install -g yarn pnpm typescript ts-node nodemon

 # Install GitHub CLI
 RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
    chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg && \
    echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null && \
    apt-get update && \
@@ -82,9 +82,11 @@
     -o runAgent.mjs && \
     chmod +x runAgent.mjs
 
-# Configure git for commits - TODO: Use user's email and name
-RUN git config --global user.email "array@posthog.com" && \
-    git config --global user.name "Array"
+# Default git identity for bot-authored commits.
+# Runs configured for user-authored pull requests receive
+# GIT_AUTHOR_*/GIT_COMMITTER_* env vars that override these defaults.
+RUN git config --global user.email "code@posthog.com" && \
+    git config --global user.name "PostHog Code"
 
 # This is required for the Claude Code SDK to allow --dangerously-skip-permissions as the root user
 ENV IS_SANDBOX=1

diff --git a/products/tasks/backend/serializers.py b/products/tasks/backend/serializers.py
@@ -10,6 +10,7 @@
 
 from .models import SandboxEnvironment, Task, TaskRun
 from .services.title_generator import generate_task_title
+from .temporal.process_task.utils import PrAuthorshipMode, RunSource
 
 PRESIGNED_URL_CACHE_TTL = 55 * 60  # 55 minutes (less than 1 hour URL expiry)
 
@@ -357,6 +358,9 @@ class ConnectionTokenResponseSerializer(serializers.Serializer):
 class TaskRunCreateRequestSerializer(serializers.Serializer):
     """Request body for creating a new task run"""
 
+    PR_AUTHORSHIP_MODE_CHOICES = [mode.value for mode in PrAuthorshipMode]
+    RUN_SOURCE_CHOICES = [source.value for source in RunSource]
+
     mode = serializers.ChoiceField(
         choices=["interactive", "background"],
         required=False,
@@ -386,6 +390,31 @@ class TaskRunCreateRequestSerializer(serializers.Serializer):
         default=None,
         help_text="Optional sandbox environment to apply for this cloud run.",
     )
+    pr_authorship_mode = serializers.ChoiceField(
+        choices=PR_AUTHORSHIP_MODE_CHOICES,
+        required=False,
+        default=None,
+        help_text="Whether pull requests for this run should be authored by the user or the bot.",
+    )
+    run_source = serializers.ChoiceField(
+        choices=RUN_SOURCE_CHOICES,
+        required=False,
+        default=None,
+        help_text="High-level source that triggered this run, used to distinguish manual and signal-based cloud runs.",
+    )
+    signal_report_id = serializers.CharField(
+        required=False,
+        default=None,
+        allow_blank=False,
+        help_text="Optional signal report identifier when this run was started from Inbox.",
+    )
+    github_user_token = serializers.CharField(
+        required=False,
+        default=None,
+        allow_blank=False,
+        write_only=True,
+        help_text="Ephemeral GitHub user token from PostHog Code for user-authored cloud pull requests.",
+    )
 
 
 class TaskRunCommandRequestSerializer(serializers.Serializer):

diff --git a/products/tasks/backend/services/modal_sandbox.py b/products/tasks/backend/services/modal_sandbox.py
@@ -162,12 +162,12 @@ def _copy_directory_contents(source: Path, destination: Path) -> None:
 
 def _populate_local_skills_directory(destination: Path) -> None:
     built_skills_dir = Path(settings.BASE_DIR) / LOCAL_BUILT_SKILLS_PATH
-    if built_skills_dir.exists():
+    if built_skills_dir.exists() and any(built_skills_dir.iterdir()):
         logger.info(f"Using pre-built skills from {built_skills_dir} for local Modal sandbox builds")
         _copy_directory_contents(built_skills_dir, destination)
         return
 
-    logger.info("Built skills directory missing; falling back to local skill sources for Modal sandbox builds")
+    logger.info("Built skills directory empty or missing; falling back to local skill sources for Modal sandbox builds")
     for relative_path in LOCAL_SOURCE_SKILLS_PATHS:
         _copy_directory_contents(Path(settings.BASE_DIR) / relative_path, destination)
 

diff --git a/products/tasks/backend/temporal/process_task/activities/create_sandbox_from_snapshot.py b/products/tasks/backend/temporal/process_task/activities/create_sandbox_from_snapshot.py
@@ -17,7 +17,8 @@
 from products.tasks.backend.temporal.observability import emit_agent_log, log_activity_execution
 from products.tasks.backend.temporal.process_task.utils import (
     build_sandbox_environment_variables,
-    get_github_token,
+    get_git_identity_env_vars,
+    get_sandbox_github_token,
     get_sandbox_name_for_task,
 )
 
@@ -70,7 +71,14 @@ def create_sandbox_from_snapshot(input: CreateSandboxFromSnapshotInput) -> Creat
         github_token = ""
         if ctx.github_integration_id is not None:
             try:
-                github_token = get_github_token(ctx.github_integration_id) or ""
+                github_token = (
+                    get_sandbox_github_token(
+                        ctx.github_integration_id,
+                        run_id=ctx.run_id,
+                        state=ctx.state,
+                    )
+                    or ""
+                )
             except Exception as e:
                 raise GitHubAuthenticationError(
                     f"Failed to get GitHub token for integration {ctx.github_integration_id}",
@@ -99,6 +107,7 @@ def create_sandbox_from_snapshot(input: CreateSandboxFromSnapshotInput) -> Creat
             team_id=ctx.team_id,
             sandbox_environment=sandbox_env,
         )
+        environment_variables.update(get_git_identity_env_vars(task, ctx.state))
 
         config = SandboxConfig(
             name=get_sandbox_name_for_task(ctx.task_id),

diff --git a/products/tasks/backend/temporal/process_task/activities/get_sandbox_for_repository.py b/products/tasks/backend/temporal/process_task/activities/get_sandbox_for_repository.py
@@ -16,9 +16,11 @@
 from products.tasks.backend.temporal.oauth import create_oauth_access_token
 from products.tasks.backend.temporal.observability import emit_agent_log, log_activity_execution
 from products.tasks.backend.temporal.process_task.utils import (
-    get_github_token,
+    get_git_identity_env_vars,
     get_sandbox_api_url,
+    get_sandbox_github_token,
     get_sandbox_name_for_task,
+    parse_run_state,
 )
 
 from .get_task_processing_context import TaskProcessingContext
@@ -31,6 +33,7 @@
     "POSTHOG_PROJECT_ID",
     "JWT_PUBLIC_KEY",
     "GITHUB_TOKEN",
+    "GH_TOKEN",
     "LLM_GATEWAY_URL",
     "POSTHOG_RESUME_RUN_ID",
 }
@@ -89,7 +92,14 @@ def get_sandbox_for_repository(input: GetSandboxForRepositoryInput) -> GetSandbo
         if has_repo:
             assert github_integration_id is not None
             try:
-                github_token = get_github_token(github_integration_id) or ""
+                github_token = (
+                    get_sandbox_github_token(
+                        github_integration_id,
+                        run_id=ctx.run_id,
+                        state=ctx.state,
+                    )
+                    or ""
+                )
             except Exception as e:
                 raise GitHubAuthenticationError(
                     f"Failed to get GitHub token for integration {github_integration_id}",
@@ -142,18 +152,22 @@ def get_sandbox_for_repository(input: GetSandboxForRepositoryInput) -> GetSandbo
 
         if github_token:
             environment_variables["GITHUB_TOKEN"] = github_token
+            environment_variables["GH_TOKEN"] = github_token
 
         if settings.SANDBOX_LLM_GATEWAY_URL:
             environment_variables["LLM_GATEWAY_URL"] = settings.SANDBOX_LLM_GATEWAY_URL
 
+        environment_variables.update(get_git_identity_env_vars(task, ctx.state))
+
+        run_state = parse_run_state(ctx.state)
+
         # Set resume run ID independently of snapshot so conversation history
         # can be rebuilt from logs even when the filesystem snapshot has expired.
-        resume_from_run_id = (ctx.state or {}).get("resume_from_run_id", "")
-        if resume_from_run_id:
-            environment_variables["POSTHOG_RESUME_RUN_ID"] = resume_from_run_id
+        if run_state.resume_from_run_id:
+            environment_variables["POSTHOG_RESUME_RUN_ID"] = run_state.resume_from_run_id
 
         # Check for resume snapshot (takes priority over integration-level snapshots)
-        resume_snapshot_ext_id = (ctx.state or {}).get("snapshot_external_id")
+        resume_snapshot_ext_id = run_state.snapshot_external_id
         if resume_snapshot_ext_id:
             used_snapshot = True