diff --git a/.changeset/ai-chat-sandbox-and-ctx.md b/.changeset/ai-chat-sandbox-and-ctx.md new file mode 100644 index 00000000000..7f453392655 --- /dev/null +++ b/.changeset/ai-chat-sandbox-and-ctx.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `TaskRunContext` (`ctx`) to all `chat.task` lifecycle events, `CompactedEvent`, and `ChatTaskRunPayload`. Export `TaskRunContext` from `@trigger.dev/sdk`. + diff --git a/.changeset/ai-sdk-chat-transport.md b/.changeset/ai-sdk-chat-transport.md new file mode 100644 index 00000000000..f5cdb9187d4 --- /dev/null +++ b/.changeset/ai-sdk-chat-transport.md @@ -0,0 +1,42 @@ +--- +"@trigger.dev/sdk": minor +--- + +Add AI SDK chat transport integration via two new subpath exports: + +**`@trigger.dev/sdk/chat`** (frontend, browser-safe): +- `TriggerChatTransport` — custom `ChatTransport` for the AI SDK's `useChat` hook that runs chat completions as durable Trigger.dev tasks +- `createChatTransport()` — factory function + +```tsx +import { useChat } from "@ai-sdk/react"; +import { TriggerChatTransport } from "@trigger.dev/sdk/chat"; + +const { messages, sendMessage } = useChat({ + transport: new TriggerChatTransport({ + task: "my-chat-task", + accessToken, + }), +}); +``` + +**`@trigger.dev/sdk/ai`** (backend, extends existing `ai.tool`/`ai.currentToolOptions`): +- `chatTask()` — pre-typed task wrapper with auto-pipe support +- `pipeChat()` — pipe a `StreamTextResult` or stream to the frontend +- `CHAT_STREAM_KEY` — the default stream key constant +- `ChatTaskPayload` type + +```ts +import { chatTask } from "@trigger.dev/sdk/ai"; +import { streamText, convertToModelMessages } from "ai"; + +export const myChatTask = chatTask({ + id: "my-chat-task", + run: async ({ messages }) => { + return streamText({ + model: openai("gpt-4o"), + messages: convertToModelMessages(messages), + }); + }, +}); +``` diff --git a/.changeset/ai-tool-execute-helper.md b/.changeset/ai-tool-execute-helper.md new file mode 100644 index 00000000000..6f7b8914504 --- /dev/null +++ b/.changeset/ai-tool-execute-helper.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `ai.toolExecute(task)` so you can pass Trigger's subtask/metadata wiring as the `execute` handler to AI SDK `tool()` while defining `description` and `inputSchema` yourself. Refactors `ai.tool()` to share the same internal handler. diff --git a/.changeset/ai-tool-toolset-typing.md b/.changeset/ai-tool-toolset-typing.md new file mode 100644 index 00000000000..de67be637f3 --- /dev/null +++ b/.changeset/ai-tool-toolset-typing.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +--- + +Align `ai.tool()` (`toolFromTask`) with the AI SDK `ToolSet` shape: Zod-backed tasks use static `tool()`; returns are asserted as `Tool & ToolSet[string]`. Raise the SDK's minimum `ai` devDependency to `^6.0.116` so emitted types resolve the same `ToolSet` as apps on AI SDK 6.0.x (avoids cross-version `ToolSet` mismatches in monorepos). + diff --git a/.changeset/chat-agent-action-trigger-type.md b/.changeset/chat-agent-action-trigger-type.md new file mode 100644 index 00000000000..050951c02b5 --- /dev/null +++ b/.changeset/chat-agent-action-trigger-type.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Include `"action"` in the `ChatTaskPayload.trigger` union. `run()` is invoked with `trigger: "action"` after `onAction` processes a typed action, but the type previously omitted it. Users can now cleanly short-circuit the LLM call for actions that don't need a response (e.g. user-initiated compaction): `if (trigger === "action") return;`. diff --git a/.changeset/chat-agent-end-run.md b/.changeset/chat-agent-end-run.md new file mode 100644 index 00000000000..e16a45cbac6 --- /dev/null +++ b/.changeset/chat-agent-end-run.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `chat.endRun()` — exits the run after the current turn completes, without the upgrade-required signal that `chat.requestUpgrade()` sends. Use when an agent finishes its work on its own terms (one-shot responses, goal achieved, budget exhausted) instead of waiting idle for the next user message. Call from `run()`, `chat.defer()`, `onBeforeTurnComplete`, or `onTurnComplete`. diff --git a/.changeset/chat-agent-finish-reason.md b/.changeset/chat-agent-finish-reason.md new file mode 100644 index 00000000000..bf6a3b2383f --- /dev/null +++ b/.changeset/chat-agent-finish-reason.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Expose `finishReason` on `TurnCompleteEvent` and `BeforeTurnCompleteEvent`. Surfaces the AI SDK's `FinishReason` (`"stop" | "tool-calls" | "length" | ...`) so hooks can distinguish a normal turn end from one paused on a pending tool call (HITL flows like `ask_user`). Undefined for manual `pipeChat()` or aborted streams. diff --git a/.changeset/chat-agent-skills-phase-1.md b/.changeset/chat-agent-skills-phase-1.md new file mode 100644 index 00000000000..4af73f20e6b --- /dev/null +++ b/.changeset/chat-agent-skills-phase-1.md @@ -0,0 +1,16 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +"@trigger.dev/build": patch +"trigger.dev": patch +--- + +Add agent skills — developer-authored folders (`SKILL.md` + scripts/references/assets) bundled into the deploy image automatically, discovered by the chat agent via progressive disclosure. Built on the [AI SDK cookbook pattern](https://ai-sdk.dev/cookbook/guides/agent-skills) — portable across providers. + +**New:** +- `skills.define({ id, path })` registers a skill with the resource catalog; the Trigger.dev CLI bundles the folder into `/app/.trigger/skills/{id}/` automatically — no `trigger.config.ts` changes, no build extension. +- `SkillHandle.local()` reads the bundled `SKILL.md` at runtime, parses frontmatter, returns a `ResolvedSkill`. +- `chat.skills.set([...])` stores resolved skills for the current run. +- `chat.toStreamTextOptions()` auto-injects the skills preamble into the system prompt and merges three tools — `loadSkill`, `readFile`, `bash` — scoped per-skill with path-traversal guards and output caps (64 KB stdout/stderr, 1 MB `readFile`). `bash` runs with `cwd` = skill directory; the turn's abort signal propagates. + +Phase 1 is SDK + CLI only — no backend, no dashboard overrides. Dashboard-editable `SKILL.md` text lands in Phase 2 (`skill.resolve()` currently throws). diff --git a/.changeset/chat-customagent-session-binding-and-stop-fixes.md b/.changeset/chat-customagent-session-binding-and-stop-fixes.md new file mode 100644 index 00000000000..2bfff208784 --- /dev/null +++ b/.changeset/chat-customagent-session-binding-and-stop-fixes.md @@ -0,0 +1,9 @@ +--- +"@trigger.dev/sdk": patch +--- + +Three chat.agent fixes surfaced by smoke-testing the Sessions migration: + +- **`chat.customAgent` now binds the session handle.** Previously only `chat.agent` set up the per-run `SessionHandle` in run-locals, so any custom agent that called `chat.messages.*`, `chat.stream.*`, `chat.createSession`, or `chat.createStopSignal` threw `chat.agent session handle is not initialized`. `chat.customAgent` now wraps the user's `run` function and opens the session via `payload.sessionId ?? payload.chatId` before invoking it, matching `chat.agent`'s behavior. +- **Stop mid-stream no longer hangs the turn loop.** When the user aborts a turn, the AI SDK's `runResult.totalUsage` promise can stay unresolved indefinitely on Anthropic streams, blocking `onTurnComplete` / `writeTurnComplete` / the next-message wait. The await is now raced against a 2s timeout (mirroring the existing `onFinishPromise` race), so a stuck `totalUsage` falls through to a non-fatal "usage unknown" path and the turn finalizes correctly. +- **New `chat.sessionId` getter.** Returns the friendlyId (`session_*`) of the run's backing Session. Useful in `onPreload` / `onChatStart` / `onTurnComplete` for persisting the session id alongside `runId` so reloads can resume the same conversation. Throws if called outside a chat.agent / chat.customAgent run. diff --git a/.changeset/chat-reconnect-isstreaming-optional.md b/.changeset/chat-reconnect-isstreaming-optional.md new file mode 100644 index 00000000000..302957c618c --- /dev/null +++ b/.changeset/chat-reconnect-isstreaming-optional.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +`TriggerChatTransport.reconnectToStream` no longer requires callers to persist an `isStreaming` flag in `ChatSession` state. Previously, any falsy `isStreaming` (including `undefined` when the flag was dropped from persistence) short-circuited reconnect to `null` and left the UI hanging on incomplete streams. Now the short-circuit only triggers on an explicit `isStreaming === false`, so callers can drop the flag entirely and let the server decide via the session's own `.out` tail. Existing callers that still persist `isStreaming` are unaffected. diff --git a/.changeset/chat-run-pat-renewal.md b/.changeset/chat-run-pat-renewal.md new file mode 100644 index 00000000000..8d4b6cb80ea --- /dev/null +++ b/.changeset/chat-run-pat-renewal.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Add run-scoped PAT renewal for chat transport (`renewRunAccessToken`), fail fast on 401/403 for SSE without retry backoff, and export `isTriggerRealtimeAuthError` for auth-error detection. diff --git a/.changeset/chat-store-primitive.md b/.changeset/chat-store-primitive.md new file mode 100644 index 00000000000..3f63d865258 --- /dev/null +++ b/.changeset/chat-store-primitive.md @@ -0,0 +1,21 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Add `chat.store` — a typed, bidirectional shared data slot on `chat.agent`. Agent-side foundation for TRI-8602. Independent of AG-UI — the same primitive will back the AG-UI `STATE_SNAPSHOT` / `STATE_DELTA` translator later. + +**New on the agent:** +- `chat.store.set(value)` — replace, emits a `store-snapshot` chunk on the existing chat output stream. +- `chat.store.patch([...])` — RFC 6902 JSON Patch, emits a `store-delta` chunk. +- `chat.store.get()` — read the current value (scoped to the run). +- `chat.store.onChange((value, ops) => ...)` — subscribe to changes. +- `hydrateStore?: (event) => value` config on `chat.agent` — mirrors `hydrateMessages`; restore the store after a continuation from your own persistence layer. +- `ChatTaskWirePayload.incomingStore` — optional wire field applied at turn start before `run()` fires (last-write-wins over `hydrateStore`). + +**New in core:** +- `store-snapshot` / `store-delta` chunk types and `applyChatStorePatch` helper exported from `@trigger.dev/core/v3/chat-client`. + +The store lives in memory for the lifetime of the run and is persisted by the existing chat output stream plus the `hydrateStore` hook across continuations — no new infrastructure. + +Client-side pieces (transport `getStore` / `setStore` / `applyStorePatch` / listeners, `AgentChat` accessors, `useChatStore` React hook, reference demo, docs) land in a follow-up. diff --git a/.changeset/chat-transport-session-renew-plus-preload.md b/.changeset/chat-transport-session-renew-plus-preload.md new file mode 100644 index 00000000000..70ac843cee3 --- /dev/null +++ b/.changeset/chat-transport-session-renew-plus-preload.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/sdk": patch +--- + +`TriggerChatTransport` fixes for session-scoped auth and end-to-end UI smoke parity: + +- `RenewRunAccessTokenParams` now includes the durable `sessionId` alongside `chatId` + `runId`. Server-side renew handlers should mint the renewed PAT with `read:sessions:{sessionId}` + `write:sessions:{sessionId}` scopes (in addition to the existing run scopes) so it keeps authenticating against the session `.in` append + `.out` subscribe endpoints. Renewing without session scopes sends the transport into a 401 loop on the first append after expiry. +- `transport.preload(chatId)` on the `triggerTask` callback path no longer calls `apiClient.createSession` from the browser. The server action (e.g. `chat.createTriggerAction`) creates the session with its secret key and returns the `sessionId` in its result, matching how `sendMessages` already worked. Browser deployments that use the `triggerTask` callback path therefore no longer need `write:sessions` on any browser-side token. diff --git a/.changeset/drop-legacy-chat-stream-constants.md b/.changeset/drop-legacy-chat-stream-constants.md new file mode 100644 index 00000000000..caa51789249 --- /dev/null +++ b/.changeset/drop-legacy-chat-stream-constants.md @@ -0,0 +1,11 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Drop the pre-Sessions chat stream-ID constants from the public surface: + +- `CHAT_STREAM_KEY`, `CHAT_MESSAGES_STREAM_ID`, `CHAT_STOP_STREAM_ID` are no longer exported from `@trigger.dev/sdk/ai` or `@trigger.dev/core/v3/chat-client`. Deletes `packages/trigger-sdk/src/v3/chat-constants.ts`. +- The `chat.stream.id` / `chat.messages.id` / `chat.stopSignal.id` labels still contain the same string values (`"chat"` / `"chat-messages"` / `"chat-stop"`) — now inlined as opaque breadcrumbs rather than user-consumable constants. Behavior and telemetry attrs are unchanged. + +These constants only mattered before the chat.agent I/O moved onto the Session primitive — the SDK no longer writes to run-scoped `streams.writer(CHAT_STREAM_KEY, …)` / `streams.input(CHAT_*_STREAM_ID)` at all. Customers who still referenced them externally should migrate to `sessions.open(sessionId).out.writer(...)` / `sessions.open(sessionId).in.on(...)` — same primitives, now session-keyed. diff --git a/.changeset/dry-sloths-divide.md b/.changeset/dry-sloths-divide.md new file mode 100644 index 00000000000..31e7ec9b941 --- /dev/null +++ b/.changeset/dry-sloths-divide.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `chat.withUIMessage()` for typed AI SDK `UIMessage` in chat task hooks, optional factory `streamOptions` merged with `uiMessageStreamOptions`, and `InferChatUIMessage` helper. Generic `ChatUIMessageStreamOptions`, compaction, and pending-message event types. `usePendingMessages` accepts a UI message type parameter; re-export `InferChatUIMessage` from `@trigger.dev/sdk/chat/react`. diff --git a/.changeset/mcp-agent-chat-sessions.md b/.changeset/mcp-agent-chat-sessions.md new file mode 100644 index 00000000000..fe47c41cba2 --- /dev/null +++ b/.changeset/mcp-agent-chat-sessions.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Migrate the MCP `start_agent_chat` / `send_agent_message` / `close_agent_chat` tools onto the Session primitive. The CLI MCP server now upserts a backing Session via `POST /api/v1/sessions` on chat start, threads `sessionId` through the run payload, sends messages to `session.in` as `ChatInputChunk { kind, payload }` JSON, and subscribes to `session.out` at `/realtime/v1/sessions/{sessionId}/out`. Scopes expanded from `write:inputStreams` to `read:sessions` + `write:sessions`. Upgrade-required re-trigger keeps the same session and swaps only `runId`. diff --git a/.changeset/mock-chat-agent-setup-locals.md b/.changeset/mock-chat-agent-setup-locals.md new file mode 100644 index 00000000000..b6bb587d314 --- /dev/null +++ b/.changeset/mock-chat-agent-setup-locals.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Add `setupLocals` option to `mockChatAgent` for dependency injection in tests. Pre-seed `locals` (database clients, service stubs) before the agent's `run()` starts, so hooks read the test instance via `locals.get()` without leaking through untrusted `clientData`. Also exposes `drivers.locals.set()` on `runInMockTaskContext`. diff --git a/.changeset/mock-chat-agent-test-harness.md b/.changeset/mock-chat-agent-test-harness.md new file mode 100644 index 00000000000..ce7cdd88f47 --- /dev/null +++ b/.changeset/mock-chat-agent-test-harness.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/sdk": patch +"@trigger.dev/core": patch +--- + +Add `mockChatAgent` test harness at `@trigger.dev/sdk/ai/test` for unit-testing `chat.agent` definitions offline. Drives a real agent's turn loop without network or task runtime: send messages, actions, and stop signals via driver methods, inspect captured output chunks, and verify hooks fire. Pairs with `MockLanguageModelV3` from `ai/test` for model mocking. + +Also adds `TestRunMetadataManager` to `@trigger.dev/core/v3/test` (in-memory metadata manager used by the harness), and exposes an `onWrite` hook on `TestRealtimeStreamsManager` so harnesses can react to stream writes without polling. diff --git a/.changeset/mock-task-context-test-infra.md b/.changeset/mock-task-context-test-infra.md new file mode 100644 index 00000000000..bae4ef8f9af --- /dev/null +++ b/.changeset/mock-task-context-test-infra.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/core": patch +--- + +Add `runInMockTaskContext` test harness at `@trigger.dev/core/v3/test` for unit-testing task code offline. Installs in-memory managers for `locals`, `lifecycleHooks`, `runtime`, `inputStreams`, and `realtimeStreams`, plus a mock `TaskContext`, so tasks can be driven end-to-end without hitting the Trigger.dev runtime. Provides drivers to send data into input streams and inspect chunks written to output streams. diff --git a/.changeset/session-sdk-toolkit.md b/.changeset/session-sdk-toolkit.md new file mode 100644 index 00000000000..877ad9f6b0c --- /dev/null +++ b/.changeset/session-sdk-toolkit.md @@ -0,0 +1,11 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Extend `SessionHandle` with two asymmetric channels mirroring the run-scoped streams primitives: + +- `.in` (`SessionInputChannel`) mirrors `streams.input` — `on` / `once` / `peek` / `wait` / `waitWithIdleTimeout` for the task to consume, `send` for external clients to produce. `.wait` / `.waitWithIdleTimeout` suspend the run on a session-stream waitpoint; the run resumes when a record lands on `.in`. +- `.out` (`SessionOutputChannel`) mirrors `streams.define` — `append` / `pipe` / `writer` for the task to produce records (all route through direct-to-S2 for uniform parsed-object serialization), plus `read` for external SSE subscribers. + +Adds the `sessionStreams` global + `StandardSessionStreamManager` (SSE-backed tail + buffer keyed on `{sessionId, io}`, registered in dev/managed run workers), `SessionStreamInstance` for direct-to-S2 piping, and `ApiClient.createSessionStreamWaitpoint` wiring. diff --git a/.changeset/skills-runtime-subpath.md b/.changeset/skills-runtime-subpath.md new file mode 100644 index 00000000000..de6787a704d --- /dev/null +++ b/.changeset/skills-runtime-subpath.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Split the skill-runtime primitives (`bash` + `readFile` tool implementations, backed by `node:child_process` + `node:fs/promises`) out of `@trigger.dev/sdk/ai` into a new `@trigger.dev/sdk/ai/skills-runtime` subpath. Fixes client-bundle build errors (`UnhandledSchemeError: Reading from "node:child_process"…`) that hit Next.js + Webpack when a browser page imports types from `@trigger.dev/sdk/ai` (for example `ChatUiMessage` via a shared tools file). The chat-agent factory now loads the runtime lazily via a computed-string dynamic import, so server workers still get full skill support without any caller changes. diff --git a/.changeset/trigger-chat-transport-watch-mode.md b/.changeset/trigger-chat-transport-watch-mode.md new file mode 100644 index 00000000000..1b2346d859e --- /dev/null +++ b/.changeset/trigger-chat-transport-watch-mode.md @@ -0,0 +1,23 @@ +--- +"@trigger.dev/sdk": patch +--- + +Add `watch` option to `TriggerChatTransport` for read-only observation of an existing chat run. + +When set to `true`, the transport keeps its internal `ReadableStream` open across `trigger:turn-complete` control chunks instead of closing it after each turn. This lets a single `useChat` / `resumeStream` subscription observe every turn of a long-lived agent run — useful for dashboard viewers or debug UIs that only want to watch an existing conversation as it unfolds, rather than drive it. + +```tsx +const transport = new TriggerChatTransport({ + task: "my-chat-task", + accessToken: runScopedPat, + watch: true, + sessions: { + [chatId]: { runId, publicAccessToken: runScopedPat }, + }, +}); + +const { messages, resumeStream } = useChat({ id: chatId, transport }); +useEffect(() => { resumeStream(); }, [resumeStream]); +``` + +Non-watch transports are unaffected — the default remains `false` and existing behavior (close on turn-complete so `useChat` can flip to `"ready"` between turns) is preserved for interactive playground-style flows. diff --git a/.claude/architecture/chat-agent-sessions.md b/.claude/architecture/chat-agent-sessions.md new file mode 100644 index 00000000000..18c41a26e0c --- /dev/null +++ b/.claude/architecture/chat-agent-sessions.md @@ -0,0 +1,416 @@ +# chat.agent on Sessions — architecture reference + +Snapshot of how `chat.agent` works after the Session migration. Meant +to orient Claude sessions and writers of `docs/ai-chat/…` without +having to re-derive the design from the code. + +Scope: everything in this document applies to the `ai-chat` PR +(`feature/tri-7532`, on top of `feature/tri-8627`). Neither is merged +yet. Once shipped, the old CHAT_STREAM_KEY / CHAT_MESSAGES_STREAM_ID / +CHAT_STOP_STREAM_ID constants are deleted and the three remaining +legacy consumers (MCP `agentChat` tool, `mock-chat-agent`, +dashboard `AgentView.tsx`) are migrated too. + +## Why + +Pre-migration, `chat.agent` ran entirely on run-scoped primitives: + +- Output: one `streams.writer("chat")` on the current run. +- Input: two `streams.input()` definitions — `"chat-messages"` and + `"chat-stop"`. +- The browser transport subscribed to + `/realtime/v1/streams/{runId}/chat` and POST-ed to run-scoped + input-stream URLs. `ChatSession` persistence was `{runId, + publicAccessToken, lastEventId}`. + +Every durable identity was the `runId`. That blocked: + +- Resuming a chat across runs (run ends → session dies). +- Listing/filtering a user's chats (no `chatId → runId` inbox). +- Cross-tab and cross-device coordination beyond a single run. +- Moving chat state between tasks without smuggling it through run + metadata. + +Sessions give us a durable `{sessionId, externalId}` pair that +outlives any one run, plus a bidirectional typed channel pair +(`.in` / `.out`). The migration rebuilds `chat.agent`'s I/O on top +of Sessions with zero surface-level change to the public +`chat.agent()` / `TriggerChatTransport` / `AgentChat` APIs. + +## The Session primitive (2-minute version) + +Lives in `feature/tri-8627`. See `packages/core/src/v3/sessions.ts` +and `apps/webapp/app/routes/(api|realtime).v1.sessions*`. + +- `sessions.create({type, externalId, …})` — Postgres upsert on + `(environmentId, externalId)`. Idempotent. +- `sessions.open(id)` — returns a `SessionHandle { id, in, out }`. + No network call until you hit a channel method. +- `.out` is a `SessionOutputChannel` — **producer-side API**: + `append` (single record), `pipe(stream)`, `writer({execute})` + (matches `streams.define`), plus `read(options?)` for external + SSE consumers. All three producer methods route through + `SessionStreamInstance` → `StreamsWriterV2` → direct-to-S2 so + subscribers see a uniform parsed-object shape. +- `.in` is a `SessionInputChannel` — **consumer-side API for the + task**: `on`, `once`, `peek`, `wait`, `waitWithIdleTimeout` + (matches `streams.input`), plus `send(value)` for external + producers. `.wait` / `.waitWithIdleTimeout` suspend the run + through a **session-scoped waitpoint** — same mechanism as + `streams.input.wait`, but the waitpoint fires when a record + lands on the session's `.in` instead of a run's input stream. +- The two channels have **zero overlapping method names** — + directional intent always stays at the call site. +- Session channels accept either the friendlyId (`session_*`) or + the user-supplied externalId. The server disambiguates via the + `session_` prefix. + +## The chat mapping + +One Session per chat conversation: + +``` +SessionHandle (durable identity, outlives runs) +├── .in — chat messages + stops (tagged ChatInputChunk) +└── .out — UIMessageChunks + control chunks + +externalId = chatId (client-owned, human-meaningful) +friendlyId = session_xxxxxxxxxxxx (generated, stable) +type = "chat.agent" +``` + +A session's `.in` carries a discriminated union — +`ChatInputChunk` in `packages/trigger-sdk/src/v3/ai.ts`: + +```ts +type ChatInputChunk = + | { kind: "message"; payload: ChatTaskWirePayload } + | { kind: "stop"; message?: string }; +``` + +The task dispatches on `chunk.kind`. The message payload is the same +`ChatTaskWirePayload` the run originally received — so a +message-kind chunk at turn N mirrors the shape of turn 0's payload. + +`.out` carries UIMessageChunks (token streaming) interleaved with +control chunks (`trigger:turn-complete`, `trigger:upgrade-required`) +and `chat.store` deltas. Semantically unchanged from pre-migration — +only the transport (S2 via Session) changed. + +## End-to-end flow (first message) + +``` + Browser Server action Webapp / S2 Agent run + ──────── ───────────── ────────── ───────── + useChat.sendMessage + → transport.sendMessages + → triggerTaskFn (if set) + sessions.create + externalId = chatId + type = "chat.agent" + → session_xxx + tasks.trigger( + "my-chat-agent", + { chatId, sessionId, + messages, trigger, + metadata }) + auth.createPublicToken({ + read: { runs, sessions }, + write: { inputStreams, sessions } }) + → { runId, publicAccessToken, + sessionId } + ← sessions.set(chatId, state) + → subscribeToSessionStream + GET /realtime/v1/sessions/{sessionId}/out + [SSE open] + run starts + payload.sessionId + locals.set(chatSessionHandleKey, + sessions.open(sessionId)) + onChatStart() + run() → streamText(…) + pipeChat(uiStream) + → chatStream.pipe + → session.out.pipe + → SessionStreamInstance + → StreamsWriterV2 + → S2 + [records land on S2] + ← SSE chunks stream [SSE delivers chunks] + id: 0 start + id: 1 start-step + id: 2 text-start + id: 3… text-delta + … + writeTurnCompleteChunk() + via chatStream.writer + id: N trigger:turn-complete await messagesInput + .waitWithIdleTimeout(…) + — run suspends on the + session-stream waitpoint +``` + +## Subsequent turns (run still live) + +``` + Browser Agent run (suspended) + ──────── ───────────────────── + transport.sendMessages (same chatId) + state.runId is set → "existing run" branch + → POST /realtime/v1/sessions/{sessionId}/in/append + body: {"kind":"message","payload":{…}} + session append handler + drain waitpoints set + → complete waitpoint + run resumes with + next message + turn-complete chunk + → session.out + [SSE delivers chunks] + ← chunks … +``` + +## Subsequent turns (previous run ended) + +Transport detects `state.runId` is gone (or append fails). Re-triggers a +new run on the same session — `sessionId` stays, only `runId` + PAT +refresh. Upgrade-required has the same shape. + +## Stop + +``` + Browser Agent run (streaming) + ──────── ───────────────────── + transport.stopGeneration(chatId) + → POST /realtime/v1/sessions/{sessionId}/in/append + body: {"kind":"stop"} + session append handler + → complete waitpoint + → deliver to stopInput.on() + currentStopController.abort() + streamText aborts + turn ends early, + trigger:turn-complete + emitted on .out + run returns to idle wait +``` + +`stopInput` is a module-level facade that filters `.in` for +`kind === "stop"`. The run's persistent listener fires on every stop +regardless of whether a turn is active. + +## Module layout (SDK) + +``` +packages/trigger-sdk/src/v3/ +├── ai.ts chat.agent factory. Module-level facades: +│ chatStream : RealtimeDefinedStream +│ messagesInput: RealtimeDefinedInputStream +│ stopInput : RealtimeDefinedInputStream<{stop, message?}> +│ Facades resolve `getChatSession()` at call time. +│ chat.stream / chat.messages re-export them for users. +│ Locals slot: chatSessionHandleKey. +│ Initialized at run start from payload.sessionId +│ (falls back to payload.chatId). +├── chat.ts TriggerChatTransport. Calls: +│ apiClient.createSession (ensureSession) +│ apiClient.appendToSessionStream(..., "in", chunk) +│ GET /realtime/v1/sessions/{sessionId}/out (SSE) +│ ChatSessionState keys on sessionId, runId optional. +├── chat-client.ts Server-side AgentChat + ChatStream. +│ Same shape as TriggerChatTransport but uses the +│ env secret key (apiClientManager.accessToken) so +│ session CRUD doesn't need extra auth wiring. +├── sessions.ts SessionHandle / SessionInputChannel / +│ SessionOutputChannel. Thin SDK over the core +│ ApiClient session methods + sessionStreams API. +``` + +## Module layout (core) + +``` +packages/core/src/v3/ +├── schemas/api.ts Session CRUD + waitpoint schemas +├── apiClient/index.ts createSession / appendToSessionStream / +│ subscribeToSessionStream / +│ initializeSessionStream / +│ createSessionStreamWaitpoint +├── sessionStreams/ +│ ├── types.ts SessionStreamManager interface +│ ├── noopManager.ts +│ ├── manager.ts StandardSessionStreamManager — +│ │ SSE tail + once/on/peek buffer +│ │ keyed on `{sessionId, io}` +│ └── index.ts SessionStreamsAPI facade +├── session-streams-api.ts `sessionStreams` global singleton +└── realtimeStreams/ + └── sessionStreamInstance.ts SessionStreamInstance — S2-only + parallel of StreamInstance. Used + by SessionOutputChannel.pipe/writer. +``` + +## Module layout (webapp) + +``` +apps/webapp/app/ +├── routes/ +│ ├── api.v1.sessions*.ts CRUD (create/list/retrieve/update/close) +│ ├── realtime.v1.sessions.$session.$io.ts SSE subscribe + HEAD (last-seq) +│ ├── realtime.v1.sessions.$session.$io.append.ts +│ │ POST append — fires pending +│ │ session-stream waitpoints after +│ │ each record lands +│ └── api.v1.runs.$runFriendlyId.session-streams.wait.ts +│ POST create-waitpoint. Race-checks +│ the S2 stream at lastSeqNum so +│ pre-arrived data fires the +│ waitpoint synchronously. +├── services/ +│ ├── realtime/ +│ │ ├── sessions.server.ts resolveSessionByIdOrExternalId, +│ │ │ serializeSession +│ │ └── s2realtimeStreams.server.ts appendPartToSessionStream, +│ │ readSessionStreamRecords, +│ │ streamResponseFromSessionStream +│ ├── sessionStreamWaitpointCache.server.ts Redis set keyed on +│ │ `ssw:{sessionFriendlyId}:{io}`; +│ │ drained atomically on append +│ └── sessionsReplicationService.server.ts Postgres → ClickHouse sessions_v1 +``` + +## Token scopes + +The PAT minted for the browser transport carries **both** run and +session scopes — so a single token covers every session-side call the +transport makes (append, subscribe) plus any remaining run-scoped +fallbacks: + +``` +{ + read: { runs: runId, sessions: sessionId }, + write: { inputStreams: runId, sessions: sessionId }, +} +``` + +Three mint sites in `ai.ts`: + +- `createChatTriggerAction` (server-side `triggerTask` helper — + creates the session before triggering, returns `sessionId` in the + result). +- `preloadAccessToken` (agent-side, per-preload). +- `turnAccessToken` (agent-side, refreshed each turn, delivered via + the `trigger:turn-complete` chunk's `publicAccessToken` field). + +The server-side `AgentChat` / `ChatStream` path uses the environment +secret key directly — no per-run tokens needed. + +## Key invariants + +- **Sessions outlive runs.** Session close is client-driven; the + task runtime never auto-closes. +- **`.in` and `.out` are disjoint.** No method appears on both + channels; directional intent is always at the call site. +- **Uniform serialization on `.out`.** `append`, `pipe`, `writer` all + route through `StreamsWriterV2` so subscribers always receive + parsed objects, never raw JSON strings. +- **Suspend-while-idle on `.in`.** Session-stream waitpoints use the + same run-engine mechanism as input-stream waitpoints — no compute + is consumed between turns. +- **One run per active turn.** The transport's first-message path + triggers a run; subsequent messages land via `.in.send(...)` + against the same run (or spawn a new run on the same session if + the previous one ended). + +## Public API surface (what changed / what's the same) + +Unchanged: + +- `chat.agent({ id, run, onChatStart, … })` +- `chat.stream`, `chat.messages`, `chat.createStopSignal` +- `chat.store.set / patch / get / onChange` +- `chat.response.write`, `chat.defer`, `chat.history`, etc. +- `TriggerChatTransport` options and methods +- `AgentChat` server-side API + +Grown: + +- `ChatTaskWirePayload` / `ChatTaskPayload` / `ChatTaskRunPayload` + gain optional `sessionId`. +- `TriggerChatTaskResult` gains optional `sessionId`. +- `TriggerChatTransport.getSession` / `setSession` / `onSessionChange` + / `sessions` options all carry `sessionId`; `runId` is now optional. +- `AgentChat.ChatSession` persistence type gains `sessionId`. + +Added (public): + +- `sessions.create / retrieve / update / close / list / open` +- `SessionHandle`, `SessionInputChannel`, `SessionOutputChannel` + +## Known follow-ups + +Tracked on task #49 in the project task list: + +- Migrate three remaining legacy-stream consumers (still use + run-scoped stream URLs): + - `packages/cli-v3/src/mcp/tools/agentChat.ts` — MCP chat tool + Claude uses to talk to agents. + - `packages/trigger-sdk/src/v3/test/mock-chat-agent.ts` — the + offline test harness. Needs a `TestSessionStreamManager` plus + a pipe/writer sink in `mock-task-context.ts` since the agent + now writes through `SessionStreamInstance` (direct-to-S2) which + the current output-inspection driver doesn't intercept. + - `apps/webapp/app/components/runs/v3/agent/AgentView.tsx` — the + dashboard's per-run agent viewer. Still subscribes to + `/realtime/v1/streams/{runId}/chat`. +- Delete `CHAT_STREAM_KEY` / `CHAT_MESSAGES_STREAM_ID` / + `CHAT_STOP_STREAM_ID` from `packages/core/src/v3/chat-client.ts` + + `packages/trigger-sdk/src/v3/chat-constants.ts` + re-exports in + `ai.ts` once those three consumers are migrated. +- Full UI smoke in `references/ai-chat` (send / stop / refresh-resume + / multi-turn / cross-run resume). Core end-to-end flow already + validated via `chat-agent-smoke` in `references/hello-world`. + +## Smoke tests + +- `sessions-smoke` (`references/hello-world/src/trigger/sessionsSmoke.ts`) + — control plane + `.out.writer` + `.out.append` + `.in.send` + + list / pagination / close / idempotent close. +- `sessions-wait-smoke` (`references/hello-world/src/trigger/sessionsWaitSmoke.ts`) + — full waitpoint suspend/resume path. Orchestrator suspends on + `.in.waitWithIdleTimeout`; delayed sender fires the waitpoint via + `.in.send`; orchestrator resumes with the payload. +- `chat-agent-smoke` (`references/hello-world/src/trigger/chatAgentSmoke.ts`) + — end-to-end chat.agent flow. Creates a session, triggers + `test-agent` with `{chatId, sessionId, messages, …}`, subscribes to + `session.out`, asserts 14 UIMessageChunks (`start` / + `start-step` / `text-start` / 7× `text-delta` / `text-end` / + `finish-step` / `finish` / `trigger:turn-complete`) with ids 0–13. + Requires `OPENAI_API_KEY` in the dev env. + +## Git trail + +Sessions branch (`feature/tri-8627-session-primitive-server-side-schema-routes-clickhouse`): + +``` +4cadc19 feat(webapp,core): Session channel waitpoints — server side +95f3c00 fix(webapp): tighten sessions create + list auth +829ccc4 fix(webapp): allow JWT + CORS on sessions list endpoint +27fb4a4 fix(core): reject externalId starting with 'session_' on Session create/update +6f9dbe5 code review fixes +16ee28f feat(webapp,clickhouse,database,core): Session primitive (server side) +``` + +AI-chat branch (`feature/tri-7532-ai-sdk-chat-transport-and-chat-task-system`): + +``` +7aa6687 fix(sdk,chat): route pipeChat through session.out + chat-agent smoke test +762ed92 feat(sdk): server-side ChatStream / AgentChat → Sessions (phase D) +91b0481 feat(sdk): chat.agent → Sessions migration (phases B + C + min E) +e72555b feat(sdk,core): Session channel SDK toolkits + waitpoints — client side +0191302 feat(sdk,core): Session client SDK + hello-world smoke test +``` + +Later commits on the chat branch (chat.store, hydrateMessages, +multi-tab coordination, tool approvals, etc.) pre-date the Session +migration and are unchanged by it — the migration changed plumbing, +not public surface. diff --git a/.claude/architecture/sessions-as-run-manager.md b/.claude/architecture/sessions-as-run-manager.md new file mode 100644 index 00000000000..41f81720a83 --- /dev/null +++ b/.claude/architecture/sessions-as-run-manager.md @@ -0,0 +1,558 @@ +# Sessions as run manager + +Plan for the next chat.agent / Sessions branch. Builds on the row-agnostic +addressing branch (`chat-agent-sessions.md`). + +## Context + +The previous branch made `chatId` (the externalId) the universal addressing +string and made the `.in/.out/wait` routes row-agnostic. It works, but the +transport still owns run lifecycle: it triggers the first run, threads +`runId` through state, has to detect "run died" so the next user message +re-triggers, and re-triggers explicitly on `trigger:upgrade-required`. + +Two real gaps fall out of that: + +1. **Run-death blindness.** `.in/append` is run-independent — it appends to + S2 successfully whether or not the run is alive. The transport's + "non-auth error → re-trigger" fallback (`chat.ts:647-654`) is dead code + under row-agnostic addressing because the endpoint always 200s. If a run + is cancelled or crashes mid-turn before emitting `turn-complete`, the + user's next message sits in S2 with no listener and the transport has + no signal to recover. + +2. **Transport carries upgrade plumbing.** ~50 lines around + `subscribeToSessionStream`'s `upgradeRetry`, threaded `payload`+ + `messages` for re-trigger, and the `triggerNewRun` call on a + client-issued retry — all so the transport can react to a chunk the + agent emits. Server is in a better position to do this work. + +The fix: **make Session the run manager.** Sessions know their task, +their config, and their current run. Server triggers/re-triggers as +needed. Browser holds a session-scoped PAT and never sees runs. + +Nothing has shipped yet — no back-compat needed. We're free to break +public surface (`chat.createTriggerAction`, `onSessionChange` shape, +`ChatSession.runId`). + +## Design + +### Invariants + +- Session is the durable identity of a chat. One session, many runs over + its lifetime. +- Session always knows its task (`taskIdentifier`) and how to trigger it + (`triggerConfig`). Sessions without those fields don't exist anymore — + Sessions are task-bound by design. +- At most one live run per session at a time. Tracked as + `Session.currentRunId` (non-FK, can lag reality). +- `Session.currentRunVersion` (monotonic int) drives optimistic locking on + any state transition that swaps the run. +- Browser only ever holds session-scoped tokens. Run identifiers are a + server-side implementation detail. +- The append-time probe is the source of truth. Hooks from run-engine into + Session are optional eager-clears for dashboard freshness, never for + correctness. + +### State machine + +``` + ┌─────────────────┐ + │ Session created │ + │ first run fired │ + └────────┬────────┘ + ▼ + ┌──────────────────┐ user msg / .in append + │ currentRun alive │ ◀────────────────────────┐ + └────────┬─────────┘ │ + │ run terminates │ + │ (idle, cancel, crash, end-cont.) │ + ▼ │ + ┌──────────────────┐ .in/append probes │ + │ currentRun stale │ ─── ensureRunForSession ─┘ + └──────────────────┘ + │ session.close() + ▼ + ┌──────────────────┐ + │ closed (terminal)│ + └──────────────────┘ +``` + +### Three trigger paths + +1. **Session create.** `POST /api/v1/sessions` creates the row and triggers + the first run synchronously, returns `{ id, runId, publicAccessToken }`. +2. **`.in/append` probe.** Server checks `currentRunId`'s snapshot status; + if terminal, calls `ensureRunForSession` before processing the append. +3. **`end-and-continue`.** Agent calls `POST /api/v1/sessions/:id/end-and-continue` + to request a clean handoff to a fresh run on the latest version. Server + triggers v2, swaps `currentRunId`, returns the new runId. v1 emits its + final `.out` chunks (e.g. `trigger:upgrade-required` for transport + telemetry) and exits. + +## Schema + +### Prisma changes + +```prisma +model Session { + // existing fields stay... + + // Now required (today nullable). Sessions are task-bound. + taskIdentifier String + + // New: trigger payload + options for re-runs. + // { basePayload, machine, queue, tags, maxAttempts, idleTimeoutInSeconds } + triggerConfig Json + + // New: current run pointer. Non-FK so run deletion doesn't cascade. + currentRunId String? + + // New: monotonic counter for optimistic locking on currentRunId swaps. + currentRunVersion Int @default(0) + + @@index([currentRunId]) // only useful for "find session by run" reverse lookups +} +``` + +### Optional historical join (defer to v1.1) + +```prisma +model SessionRun { + sessionId String + runId String @unique + reason String // "initial" | "continuation" | "upgrade" | "manual" + triggeredAt DateTime @default(now()) + + @@index([sessionId]) +} +``` + +Not strictly needed for v1 — debugging/audit can use TaskRun's existing +metadata + `Session.currentRunId` history via `git`-style logs in +ClickHouse if desired. Add only if a concrete dashboard surface needs it. + +### Migration + +Two-step: + +1. Add the new columns + populate `taskIdentifier` from existing data + (chat.agent sessions all have it implicit via tags or metadata). +2. Set `triggerConfig = '{}'` for any existing sessions and either close + them or leave them as zombies. Since the old transport still works + pre-merge, this branch is the cutover. + +For the dev DB: I'll write a backfill that closes existing dev sessions +rather than try to compute valid triggerConfigs for them. They were all +test data anyway. + +## API surface + +### `POST /api/v1/sessions` — modified + +Two auth modes: + +| Mode | Caller | Required scope | Notes | +| ----------- | ----------------------- | ----------------------------- | -------------------------------------------------- | +| Secret key | Customer's server | env-wide | `chat.createStartSessionAction` server action | +| One-time JWT| Browser | `trigger:tasks:{taskId}` | Mints via `auth.createTriggerPublicToken(taskId)` | + +Body (Zod-validated): + +```ts +{ + type: string, // existing + externalId?: string, // chatId for chat.agent + taskIdentifier: string, // required; must match scope if JWT + triggerConfig: { + basePayload: Record, + machine?: MachinePresetName, + queue?: string, + tags?: string[], // ≤5 + maxAttempts?: number, + idleTimeoutInSeconds?: number, + }, + tags?: string[], // existing — session-level tags + metadata?: Record, // existing +} +``` + +Response: + +```ts +{ + id: string, // session_* + runId: string, // first run, freshly triggered + publicAccessToken: string, // session-scoped, long TTL + externalId: string | null, + type: string, + // ... rest of SessionItem fields +} +``` + +Behavior: + +- Idempotent on `(env, externalId)`. Repeat calls return the existing + session, ensure-running its run if terminal, return a fresh PAT. +- Token consumption: if JWT mode, the one-time token is consumed on first + successful call (existing replay-protection infra). +- PAT scopes returned: `read:sessions:{externalId} + write:sessions:{externalId}`. + No run-scoped permissions — the transport doesn't need them. + +### `POST /api/v1/sessions/:id/in/append` — modified + +Add the probe + ensure-run step before the existing S2 append. Pseudocode: + +```ts +const sess = await readSession(id); +if (sess.closedAt) return 400; +if (sess.expiresAt && sess.expiresAt < now) return 400; + +if (!sess.currentRunId || isTerminal(await getSnapshotStatus(sess.currentRunId))) { + await ensureRunForSession(sess); // see below +} + +return appendToS2(addressingKey, body); // unchanged +``` + +The probe is one Redis snapshot read (`getSnapshotStatus` is cheap, +already used by the run-engine). Net hot-path overhead: ~1ms. + +### `POST /api/v1/sessions/:id/end-and-continue` — new + +Called by the run itself (uses internal run auth, scoped to the +calling run's id + the session id). Triggers a fresh run for the same +session, atomically swaps `currentRunId`, returns the new runId. + +Body: + +```ts +{ + reason: "upgrade" | "explicit-handoff" | string, + // optional metadata for SessionRun.reason if/when we add the join table +} +``` + +Response: + +```ts +{ runId: string } +``` + +The calling run is expected to exit shortly after receiving the response — +it has done whatever wrap-up it wanted and is delegating the conversation +to the new run. The transport sees this as "more chunks arrive on `.out`, +some from v1 then some from v2" — it's the same S2 stream keyed on chatId. + +### Other routes — unchanged + +`GET /api/v1/sessions/:id`, `PATCH /api/v1/sessions/:id` (close, update), +`PUT /realtime/v1/sessions/:id/:io`, `GET /realtime/v1/sessions/:id/:io` +(SSE subscribe, including the row-agnostic addressing from the previous +branch) — all stay the same. + +## Server internals + +### `ensureRunForSession` — atomic re-run via optimistic locking + +Lives in a new service: `apps/webapp/app/services/realtime/sessionRunManager.server.ts`. + +```ts +async function ensureRunForSession( + sess: SessionRow, + reason: "initial" | "continuation" | "upgrade" | "manual" +): Promise<{ runId: string }> { + // 1. Trigger the run upfront. Cheap to cancel if we lose the race. + const newRun = await triggerTaskInternal(sess.taskIdentifier, sess.triggerConfig); + + // 2. Try to claim the slot. + const claimed = await prisma.session.updateMany({ + where: { + id: sess.id, + currentRunVersion: sess.currentRunVersion, + }, + data: { + currentRunId: newRun.id, + currentRunVersion: { increment: 1 }, + }, + }); + + if (claimed.count === 1) { + // Optionally record SessionRun history here. + return { runId: newRun.id }; + } + + // 3. Lost the race. Cancel ours, reuse whoever won. + cancelTaskRun(newRun.id).catch(() => {/* fire-and-forget */}); + const fresh = await readSession(sess.id); + if (fresh.currentRunId && !isTerminal(await getSnapshotStatus(fresh.currentRunId))) { + return { runId: fresh.currentRunId }; + } + + // 4. Pathological: winner's run died between win and our re-read. Recurse. + return ensureRunForSession(fresh, reason); +} +``` + +Key properties: +- No DB lock held across the trigger network call. +- Wasted-trigger window is small and bounded (multi-tab race on dead run, + ms apart). Cancel cost is negligible. +- Recursion only on pathological double-failure; bounded by run-engine's + own progress. + +### Run-engine eager-clear (optional, defer) + +A run-engine post-termination hook that nulls `Session.currentRunId` when +the terminal run matches. Purely a dashboard freshness concern. Skip in +v1 — append-time probe is the source of truth. + +## SDK changes + +### Transport (`packages/trigger-sdk/src/v3/chat.ts`) + +State collapses to: + +```ts +type ChatSessionState = { + publicAccessToken: string; // session-scoped, long TTL + lastEventId?: string; // for SSE resume + isStreaming?: boolean; // for reconnect-on-reload UX + skipToTurnComplete?: boolean; // for stop+resume UX +}; +``` + +Note: no `runId`, no `sessionId`. The chat is the chatId; the token is +session-scoped. + +Removed: +- `triggerTaskFn` callback option (constructor branch on it) +- `triggerNewRun()` method +- `renewRunPatForSession()` +- `renewRunAccessToken` callback option (token is session-scoped, doesn't + expire on run boundaries) +- `ensureSession()` (already removed in previous branch) +- The `trigger:upgrade-required` re-trigger handler in + `subscribeToSessionStream` (~50 lines) +- The `upgradeRetry: { payload, messages }` parameter threaded through + `sendMessages`, `preload`, `subscribeToSessionStream` +- The non-auth-error fallback in `sendMessages` (dead code, removed) + +Renamed/replaced: +- `chat.createTriggerAction` → `chat.createStartSessionAction` + - Calls `sessions.create({ taskIdentifier, externalId, triggerConfig })` + server-side with secret key + - Returns `{ publicAccessToken }` (no runId — invisible to browser) + +New methods: +- `transport.start(chatId, opts)` — for the browser-mediated path: + - Customer provides a `getStartToken(taskId)` callback that mints the + one-time JWT + - Transport calls `POST /sessions` with that token + - Receives session PAT, stores as state.publicAccessToken +- `transport.preload(chatId)` — same shape as `start` but with empty + basePayload override + +Method behavior changes: +- `sendMessages` — no trigger logic. Always `.in/append`. Server triggers + if needed. On 401/403, error out (token expired — customer's token + callback should provide fresh). +- `subscribeToSessionStream(chatId)` — pure passthrough on `.out`. Filters + `trigger:upgrade-required` for cleanliness (server handles the re-run + swap). Filters `trigger:turn-complete` as today. +- `stopGeneration` — `.in/append` with `{ kind: "stop" }`. Unchanged. +- `getSession(chatId)` — returns `{ publicAccessToken, lastEventId, isStreaming }`. + No id fields. + +### `chat-client.ts` (server-side AgentChat) + +Mirror the transport: state without `runId`/`sessionId`, no `triggerNewRun`, +constructor takes `{ chatId, publicAccessToken }` (or mints via secret +key). All `.in/append` and `.out` URLs use `chatId`. + +### `chat.agent` runtime (`packages/trigger-sdk/src/v3/ai.ts`) + +- Drop the fire-and-forget `sessions.create({ externalId: chatId })` at + bind. Session already exists by the time the agent boots — server + triggers via `ensureRunForSession` after creating the row. +- Keep `sessions.open(payload.chatId)` for helper resolution. No change. +- `chat.requestUpgrade()` plumbing: calls `POST /sessions/:id/end-and-continue` + with the run's internal auth. On success, emits `trigger:upgrade-required` + on `.out` for telemetry, exits cleanly. + +### Reference projects (`references/ai-chat`) + +- `actions.ts`: replace `chat.createTriggerAction` callsite with + `chat.createStartSessionAction` +- `chat-app.tsx`: pass the new `start` mode to `useTriggerChatTransport` +- `chat.tsx`: drop `runId` references +- `trigger/chat.ts`: no changes (chat.agent contract unchanged from + agent-author POV) + +## Auth model summary + +| Token | Scopes | Where minted | Lifetime | +| ----------------------------- | ------------------------------------------------------ | ----------------------------------------- | ----------- | +| Trigger-task one-shot | `trigger:tasks:{taskId}` | `auth.createTriggerPublicToken(taskId)` | One use | +| Session PAT | `read:sessions:{ext} + write:sessions:{ext}` | Issued by `POST /sessions` | 1h–24h | +| Run-internal PAT (chat.agent) | `read:runs:{run} + read:sessions:{ext} + …` | Server-side, never crosses to browser | Run-bounded | + +Browser holds at most a one-shot token (briefly) and a session PAT +(steady state). Never holds a run-scoped token. + +## Edge cases + +- **Concurrent multi-tab on dead run** — optimistic locking handles it, + loser cancels its triggered run. +- **Page refresh mid-stream** — `.out` SSE resumes via Last-Event-ID + (existing); session PAT survives because it's not run-scoped. +- **Run cancelled by user (dashboard)** — append-time probe sees terminal, + triggers new run on next message. +- **Idle exit** — same path; user comes back later, sends message, fresh + run boots. +- **Crash mid-turn (no `turn-complete` emitted)** — same path; persisted + store is pre-turn, fresh run reads `.in` from tail position, picks up + unanswered message. +- **Upgrade during user message** — optimistic locking in + `end-and-continue` ensures one wins. If user message wins, + `end-and-continue` returns conflict, agent v1 keeps running, processes + message, retries upgrade later. If upgrade wins, user message's append + probes fresh `currentRunId` (v2), uses it. +- **Session expiry mid-conversation** — `.in/append` and `end-and-continue` + reject after `expiresAt`. Existing run keeps running until idle, then + exits. Frontend sees a 400. +- **Concurrent `POST /sessions`** — unique constraint on + `(env, externalId)`, idempotent upsert returns existing row + ensure-runs. + +## Tests + +### Unit + +- `ensureRunForSession`: + - Happy path (no contention) + - Concurrent contention (two callers, one wins, loser reuses winner's + run) + - Pathological recursion (winner's run dies before loser re-reads) + - Trigger failure (caller's responsibility to surface) +- `POST /sessions` route: + - Idempotent upsert (same externalId → same row, fresh PAT) + - Auth: secret key path, JWT path with valid scope, JWT path with wrong + task scope (403), JWT replay (consumed token rejected) + - First run triggered, runId in response +- `POST /sessions/:id/in/append`: + - Probe path: alive run, terminal run, null currentRunId + - Probe + trigger: ensure new run before append + - Closed session 400 + - Expired session 400 +- `POST /sessions/:id/end-and-continue`: + - Auth: only callable from the current run + - Optimistic locking: stale currentRunId loses gracefully + +### Integration + +- chat.test.ts rewrite around the new transport surface (no `runId`, + no `triggerNewRun`) +- mock-chat-agent harness updates: install `__setSessionCreateImplForTests` + to also stub the first-run trigger (the create + trigger is now atomic + on the server, so the test harness needs to surface a fake runId) + +### Smoke (manual via Chrome DevTools) + +Same checklist as the previous branch's smoke test, plus: + +- Cancel run via dashboard → next user message triggers fresh run + automatically (no longer a gap) +- Deploy a new agent version mid-conversation → existing run requests + upgrade, exits, new run continues seamlessly (transport sees no + interruption beyond a possible extra TTFB) + +## Verification plan + +Per-package: + +``` +pnpm run typecheck --filter webapp # apps + internal pkgs +pnpm run typecheck --filter @internal/run-engine +pnpm run build --filter @trigger.dev/sdk # public package +pnpm run build --filter @trigger.dev/core # public package +pnpm run test --filter webapp -- sessionRunManager +pnpm run test --filter @trigger.dev/sdk -- chat +``` + +End-to-end via the playground: + +1. ai-chat (chat.agent) — basic send + reply +2. ai-chat-session (custom agent) — basic send + reply +3. ai-chat-raw — basic send + reply +4. ai-chat-hydrated — basic send + reply +5. Mid-stream reload — SSE reconnect +6. Stop + follow-up — same run handles next turn +7. Cancel run + send message → new run triggered automatically (the gap + from previous branch's S4 — must pass cleanly here) +8. Deploy new version + send message → in-flight conversation upgrades + transparently +9. Cross-form addressing curl matrix — unchanged from previous branch + +## Rollout + +- Single feature branch off `main` (or off the previous chat-agent-sessions + branch once that lands). +- No flag, no shim. Hard cutover. Pre-release SDK version. +- Reference projects updated in the same PR so the smoke test path works. + +## Open questions + +1. **Should `end-and-continue` accept a custom `triggerConfig` override?** + Use case: agent wants to swap to a different task identifier (rare). + Probably defer — keep it strictly "trigger another run with the same + config" for v1. +2. **Should `triggerConfig` pin the deploy version?** If a customer + redeploys with a chat.agent contract change, in-flight sessions might + have payloads incompatible with the new version. Probably defer — + chat.agent contract is stable; signature-breaking changes are rare and + warrant explicit handling. +3. **`SessionRun` join table**: yes/no/defer? Defer to v1.1 unless a + concrete dashboard surface needs it. +4. **`getSnapshotStatus` cost on hot path** — measure before optimizing. + Redis snapshot read should be sub-ms; if it isn't, cache for 1-2s + per session. + +## Out of scope + +- Session-level retry policies (separate feature) +- Multi-run-per-session (parallel agents on one chat) — explicit + non-goal; one currentRunId by design +- Cross-environment sessions (a session in dev, run in prod) — not + considered +- Public `Session.requestRun()` for callers other than the running + agent itself — defer until a use case appears +- Webhook notifications on run swap — defer + +## Effort estimate + +- Schema + migration: 0.5 day +- `ensureRunForSession` service + tests: 1.5 days +- `POST /sessions` auth modes + idempotent upsert + first-run trigger: 1 day +- `.in/append` probe: 0.5 day +- `end-and-continue` route + agent runtime wiring: 1 day +- Transport rewrite + tests: 2.5 days +- chat-client rewrite + tests: 1 day +- chat.agent runtime cleanup: 0.5 day +- `chat.createStartSessionAction` + browser path: 1 day +- Reference project migration: 0.5 day +- Smoke test + bug-fix buffer: 1.5 days + +**~11 days** focused work. Plus design doc review and any architectural +back-and-forth — call it 2 weeks calendar. + +## Implementation order + +1. Schema + migration (gives the new columns; everything else builds on this) +2. `ensureRunForSession` service + unit tests (the load-bearing primitive) +3. `POST /sessions` route changes (creates a session that actually has a run) +4. `.in/append` probe path (so the server can self-heal between runs) +5. `end-and-continue` route + chat.agent runtime call (upgrade flow) +6. Transport rewrite (depends on all the server pieces) +7. chat-client rewrite (mirrors transport; cheap once that's done) +8. `chat.createStartSessionAction` + reference project migration +9. Smoke test + final bug fixes diff --git a/.claude/docs-plans/sessions-as-run-manager-docs.md b/.claude/docs-plans/sessions-as-run-manager-docs.md new file mode 100644 index 00000000000..4d8e858a876 --- /dev/null +++ b/.claude/docs-plans/sessions-as-run-manager-docs.md @@ -0,0 +1,366 @@ +# Docs update plan: Sessions-as-run-manager + +Companion to commits `7a48c1e6` (ai-chat) and `427541c2` (sessions server). Captures every doc page that needs to change, what's getting removed, and an upgrade guide for prerelease users. + +## Architectural summary (the diff readers should internalize) + +Pre-migration mental model: Sessions and chat.agent were two separate primitives. Sessions had its own create/list/close API; chat.agent rolled its own run-scoped streams. The two coexisted but didn't share machinery — chat.agent's wire path (run streams) was distinct from Sessions' wire path (`.in` / `.out` channels). + +Post-migration mental model: **Sessions is the run manager.** A Session row is task-bound (`taskIdentifier` + `triggerConfig` are required), it owns its current run via `currentRunId` (optimistic-claim), and it tracks every run it ever triggered in a `SessionRun` audit table. chat.agent is now just a particular kind of task you bind a Session to. The standalone "create a Session, then trigger something against it" path is gone — `sessions.start({...})` atomically creates the row and triggers the first run. + +Wire-level, the transport now talks to one set of routes (`/realtime/v1/sessions/:s/...` and `/api/v1/sessions/:s/...`); the per-run-stream code path is dead for chat. + +## Standalone Sessions docs: REMOVE + +`docs/sessions/` was written for the standalone-Session model. With sessions now task-bound, every page in that directory is incorrect: + +- `sessions/overview.mdx` — describes a generic session-as-bidirectional-channel primitive. Standalone create/list/close as the entry point. +- `sessions/quick-start.mdx` — `sessions.create({type, externalId})` then trigger something. Pattern no longer exists. +- `sessions/channels.mdx` — `.in` / `.out` documented from the standalone-session perspective. +- `sessions/reference.mdx` — API surface for the standalone primitive. + +**Action:** +1. Delete all four files: `docs/sessions/{overview,quick-start,channels,reference}.mdx`. +2. Remove the entire `Sessions` group from `docs/docs.json` under the `AI` group: + ```json + { + "group": "Sessions", + "pages": ["sessions/overview", "sessions/quick-start", "sessions/channels", "sessions/reference"] + } + ``` +3. Don't redirect — the URLs were never widely shared (this was alpha-tier surface). If we add Sessions docs back later, we can decide redirect-vs-fresh-slug then. + +We'll re-introduce Sessions docs once the primitive is stable and we have a non-chat.agent customer flow to document. + +## ai-chat docs: UPDATE + +Pages listed in the order they appear in `docs.json`. Each entry calls out the specific stale claims and what to replace. + +### `ai-chat/overview.mdx` +- Replace any line that says chat.agent runs on per-run streams or that the transport mints run-scoped tokens. +- Add one paragraph on the underlying primitive: chat.agent is bound to a Session that owns its runs. Customer-facing surface unchanged. +- If there's a "how it works" diagram, update arrows: browser → server action → `chat.createStartSessionAction` → Session row + first run + session PAT → browser → `.in/append` + `.out` SSE. + +### `ai-chat/changelog.mdx` +- Add an entry for the migration: "Sessions-as-run-manager — chat.agent now runs on top of a durable Session row that owns its runs. Public surface unchanged. See upgrade guide." + +### `ai-chat/quick-start.mdx` +- The transport snippet is the highest-value example in the docs. It must show the new shape: + ```ts + const transport = useTriggerChatTransport({ + task: "my-agent", + accessToken: ({ chatId }) => mintAccessToken(chatId), + startSession: ({ chatId, taskId, clientData }) => + startChatSession({ chatId, taskId, clientData }), + }); + ``` +- Server actions page should show `chat.createStartSessionAction("my-agent")` and `auth.createPublicToken({scopes: {sessions: chatId}})`. +- Drop any mention of `getStartToken` and `auth.createTriggerPublicToken` for the chat path. + +### `ai-chat/backend.mdx` +- The `chat.agent({...})` shape itself is unchanged — leave the `run`, `onPreload`, `onTurnStart`, `onTurnComplete` callbacks alone. +- Add a section on `chat.createStartSessionAction(taskId, options?)`. This is the canonical server-side entry point now. Show: + - Default `triggerConfig.basePayload`: `{messages: [], trigger: "preload"}` baked in. Customer overrides via `options.triggerConfig`. + - Idempotent on `(env, externalId)`. Concurrent calls for the same chatId converge. + - Returns `{sessionId, runId, publicAccessToken}`. +- Update `chat.requestUpgrade()` description: it now calls `endAndContinueSession` server-side, which atomically swaps `Session.currentRunId` to a new run. Browser keeps streaming across the swap. + +### `ai-chat/frontend.mdx` +- This is where most of the transport API lives. Rewrite around the two callbacks: + - `accessToken: ({chatId}) => string` — pure refresh, called on 401/403. + - `startSession?: ({chatId, taskId, clientData}) => {publicAccessToken}` — wraps the customer's server action, called on `transport.preload(chatId)` and lazy first `sendMessage`. +- Show the typed `clientData` flow: `useTriggerChatTransport` infers `clientData` from `withClientData`, threads it into `startSession`'s params, and merges into per-turn `metadata`. +- Drop `getStartToken` documentation entirely. +- `transport.preload(chatId)` no longer takes per-call options. If the customer needs dynamic per-call config they capture it in their server action via closure (typically over a ref for live values like the playground's `clientDataJsonRef`). +- Persistable `ChatSession`: `{publicAccessToken, lastEventId?}`. `runId` is gone. + +### `ai-chat/server-chat.mdx` +- `AgentChat` (server-side chat client) — same shape, but the `session` prop now takes `{lastEventId?}` only. +- `onTriggered({runId, chatId})` callback is still useful for telemetry / dashboard linking — the `runId` is the *current* run, not the only run. Note that across turns the runId may change (continuation runs after idle, upgrade runs, etc.). + +### `ai-chat/types.mdx` +- `ChatSession` — drop `runId`, drop `sessionId`. Just `{publicAccessToken, lastEventId?}`. +- `StartSessionParams`, `StartSessionResult` — new public types. +- `AccessTokenParams` — narrowed to `{chatId}` only (no metadata threading). +- Remove `GetStartTokenParams` from the type table. + +### `ai-chat/features.mdx` +- Audit for any mention of run-scoped streams, `CHAT_STREAM_KEY`, `CHAT_MESSAGES_STREAM_ID`, `CHAT_STOP_STREAM_ID`. All gone. +- Add: cross-form addressing on the wire (a session-scoped JWT minted for either `externalId` or `friendlyId` form authorizes either URL form). +- Add: SessionRun audit log — every run a chat session has triggered is recorded, queryable via the dashboard. + +### `ai-chat/compaction.mdx` +- Should be untouched (compaction lives inside `chat.agent`'s turn loop, doesn't depend on the wire model). + +### `ai-chat/pending-messages.mdx` +- Should be untouched (steering messages flow through `.in.append` regardless). + +### `ai-chat/background-injection.mdx` +- Same — injection happens inside the run, the run's wire path swap doesn't affect it. + +### `ai-chat/error-handling.mdx` +- Add: errors from `startSession` callback. The customer's server action can fail (auth check, DB write). Surface via `onSessionChange(chatId, null)` or via the customer's own try/catch in their callback. +- Replace any 401/403 retry logic that mentions `getStartToken` — it's `accessToken` now. + +### `ai-chat/mcp.mdx` +- Audit for `getStartToken` mentions in MCP tool examples. + +### `ai-chat/testing.mdx` +- The `mock-chat-agent` test harness moved to `setupSessionStartImplForTests` / similar — verify and update examples. +- Show how to mock `startSession` in unit tests (it's a fetch-mock or vi.fn returning `{publicAccessToken}`). + +### `ai-chat/client-protocol.mdx` +- The wire-level protocol page. Replace any `/realtime/v1/streams/{runId}/chat` URLs with `/realtime/v1/sessions/{chatId}/{io}`. +- Document the chunk shape on `.in`: tagged union — `{kind: "message", payload}` for user turns, `{kind: "stop"}` for stop signals, `{kind: "action", name, payload}` for typed actions. +- Document `.out` chunks: `UIMessageChunk`s interleaved with `trigger:turn-complete`, `trigger:upgrade-required` control markers. +- Cross-form addressing on session-scoped PATs. + +### `ai-chat/reference.mdx` +- Public API surface tables. `TriggerChatTransportOptions` — drop `getStartToken`, `triggerConfig`, `triggerOptions`; add `startSession`. +- `chat.createStartSessionAction(taskId, options?)` — full signature. +- `chat.requestUpgrade()` — keep, but note the new server-orchestrated swap behaviour. + +### `ai-chat/patterns/version-upgrades.mdx` +- This page is essentially about `chat.requestUpgrade()`. Update to explain the new mechanism: + - Old: agent emitted `trigger:upgrade-required` chunk, transport consumed it, transport triggered a new run from the browser side. + - New: agent calls `endAndContinueSession` (server-to-server), webapp atomically swaps `Session.currentRunId` to a freshly-triggered run, transport's existing SSE keeps streaming on the same session — no transport-side swap. +- Add: `SessionRun` audit row with `reason: "upgrade"`. + +### `ai-chat/patterns/sub-agents.mdx` +- Audit for any session.create / sub-agent-as-session-creator patterns. Sub-agents now get their session via the parent's task trigger (or by calling `sessions.start({ ... })` themselves with a different taskIdentifier). + +### `ai-chat/patterns/database-persistence.mdx` +- The reference app's `ChatSession` schema is now simpler: `{id, publicAccessToken, lastEventId?}`. Drop `runId`/`sessionId` columns from any example schemas. +- The persistence pattern itself is unchanged: persist the PAT + lastEventId, hydrate on page load via `sessions: { [chatId]: ... }` on the transport. + +### `ai-chat/patterns/branching-conversations.mdx` +- Should be mostly unchanged. Branching is a customer-side concern (multiple chatIds, each one its own session). + +### `ai-chat/patterns/code-sandbox.mdx` +- Audit for stale references. Probably fine. + +### `ai-chat/patterns/human-in-the-loop.mdx` +- Should be unchanged. + +### `ai-chat/patterns/skills.mdx` +- Should be unchanged. + +## NEW page: upgrade guide for chat.agent prerelease users + +Filename: `docs/ai-chat/upgrade-guide.mdx` (or `migration-from-prerelease.mdx` — pick whichever fits the docs style). Add to `docs.json` near the top of the AI Chat group, between `overview` and `quick-start`. + +Contents: + +```mdx +--- +title: "Upgrade guide: prerelease → Sessions-as-run-manager" +description: "Migrating chat.agent code from the prerelease API to the Sessions-as-run-manager release." +--- + +# Upgrade guide + +This guide is for customers who tried `chat.agent` during the prerelease period +(any `@trigger.dev/sdk` build before vX.Y.Z). The public surface is largely +unchanged — `chat.agent({...})`, `useTriggerChatTransport`, `chat.store` / +`chat.defer` / `chat.history`, `AgentChat` — but the transport callbacks and a +few server-side helpers were renamed. + +## TL;DR + +- **`getStartToken` is gone.** Replace with `startSession`, a server-action + callback that returns `{publicAccessToken}`. +- **`chat.createStartSessionAction(taskId, options?)` is the canonical + server-side entry point.** Replaces ad-hoc `auth.createTriggerPublicToken` + + manual session create. +- **`ChatSession` persistable shape changed.** Drop the `runId` field; + store only `{publicAccessToken, lastEventId?}`. +- **`transport.preload(chatId)` no longer takes per-call options.** + Trigger config (machine, idleTimeoutInSeconds, tags) lives server-side in + `chat.createStartSessionAction(taskId, options)`. +- **Wire URLs changed.** Anything that hit + `/realtime/v1/streams/{runId}/chat` directly should use + `/realtime/v1/sessions/{chatId}/out` (subscribe) or + `/realtime/v1/sessions/{chatId}/in/append` (send). + +## Transport: replace `getStartToken` with `startSession` + +### Before + +```ts +const transport = useTriggerChatTransport({ + task: "my-agent", + accessToken: async ({ chatId }) => mintToken(chatId), + getStartToken: async ({ taskId }) => mintTriggerToken(taskId), + triggerConfig: { basePayload: { /* ... */ } }, + triggerOptions: { tags: [...], machine: "small-1x" }, +}); +``` + +The browser called `auth.createTriggerPublicToken(taskId)` server-side to get +a one-shot trigger JWT, then `POST /api/v1/sessions` from the browser. + +### After + +```ts +const transport = useTriggerChatTransport({ + task: "my-agent", + accessToken: ({ chatId }) => mintAccessToken(chatId), + startSession: ({ chatId, taskId, clientData }) => + startChatSession({ chatId, taskId, clientData }), +}); +``` + +Where `startChatSession` is a server action wrapping +`chat.createStartSessionAction`: + +```ts +"use server"; +import { chat } from "@trigger.dev/sdk/ai"; + +export const startChatSession = chat.createStartSessionAction("my-agent", { + triggerConfig: { + machine: "small-1x", + tags: ["my-tag"], + }, +}); +``` + +The browser never holds a `trigger:tasks:{taskId}` JWT now. All session +creation goes through the customer's server, where authorization decisions +live alongside the customer's own DB writes. + +## Server actions: replace ad-hoc helpers with `chat.createStartSessionAction` + +### Before + +```ts +"use server"; +import { auth, sessions } from "@trigger.dev/sdk"; + +export async function startChatSession({ chatId, taskId }) { + const session = await sessions.create({ + type: "chat.agent", + externalId: chatId, + }); + // ... separately trigger the agent task ... + const publicAccessToken = await auth.createPublicToken({ + scopes: { read: { sessions: chatId }, write: { sessions: chatId } }, + }); + return { publicAccessToken }; +} +``` + +### After + +```ts +"use server"; +import { chat } from "@trigger.dev/sdk/ai"; + +export const startChatSession = chat.createStartSessionAction("my-agent"); +``` + +The new helper handles session creation + first-run trigger + PAT mint +atomically. It's idempotent on `(env, externalId)` — concurrent calls for the +same `chatId` converge to the same session. + +## `ChatSession` shape: drop `runId` + +Persistable session state is now just the PAT + last event ID: + +```ts +// before +type ChatSession = { runId: string; publicAccessToken: string; lastEventId?: string }; + +// after +type ChatSession = { publicAccessToken: string; lastEventId?: string }; +``` + +If your DB schema has a `runId` column on a session-state table, drop it (or +keep it for telemetry — but the transport doesn't read it). The current run +ID is server-side state on the Session row; the transport doesn't need to +know it. + +## `clientData`: typed and threaded automatically + +If your agent uses `chat.agent(...).withClientData({schema})`, the transport +infers the `clientData` type from `useTriggerChatTransport` +and threads it through `startSession`'s params. Set it once on the +transport: + +```ts +useTriggerChatTransport({ + // ... + clientData: { userId: currentUser.id, plan: currentUser.plan }, +}); +``` + +The same value also merges into per-turn `metadata` on the wire, and your +`startSession` callback receives it as `params.clientData`. Pass through to +`chat.createStartSessionAction` via `triggerConfig.basePayload.metadata` and +the agent's first run sees it in `payload.metadata`. + +## `chat.requestUpgrade()`: server-orchestrated now + +The behaviour didn't change from the customer's perspective — call +`chat.requestUpgrade()` inside `onTurnStart` / `onValidateMessages` and the +current run will exit so the next message starts on the latest version. + +What changed under the hood: + +- **Before:** the agent emitted a `trigger:upgrade-required` chunk on + `.out`, the transport consumed it browser-side and triggered a new run. +- **After:** the agent calls `endAndContinueSession` server-to-server, the + webapp triggers a new run and atomically swaps `Session.currentRunId`, + the browser's existing SSE subscription keeps receiving chunks across + the swap. Faster handoff, no browser-side bookkeeping. + +The `SessionRun` audit table records every run, including upgrade-driven +ones (with `reason: "upgrade"`). + +## Going to URLs directly? + +Anyone hitting raw URLs (instead of going through the SDK) should switch: + +| Before | After | +|---|---| +| `/realtime/v1/streams/{runId}/chat` (subscribe) | `/realtime/v1/sessions/{chatId}/out` | +| `/realtime/v1/streams/{runId}/{target}/chat-messages/append` | `/realtime/v1/sessions/{chatId}/in/append` (`{kind: "message", payload}` body) | +| `/realtime/v1/streams/{runId}/{target}/chat-stop/append` | `/realtime/v1/sessions/{chatId}/in/append` (`{kind: "stop"}` body) | + +The session-scoped PAT (`read:sessions:{chatId} + write:sessions:{chatId}`) +authorizes both the `externalId` form (e.g. `/sessions/my-chat-id/out`) +and the `friendlyId` form (e.g. `/sessions/session_abc.../out`). + +## Things that didn't change + +- `chat.agent({...})` definition shape and all callbacks. +- `chat.store` / `chat.defer` / `chat.history` APIs. +- `AgentChat` (server-side chat client) — same constructor, same methods. +- `useTriggerChatTransport`'s React semantics (created once, kept in a ref, + callbacks updated via `setOnSessionChange` / `setClientData` under the hood). +- Multi-tab coordination, pending-messages / steering, background injection. +- Per-turn `metadata` flowing through `sendMessage({ text }, { metadata })`. +``` + +## Other doc surfaces touched + +- `docs/ai/prompts.mdx` — only mentions `chat.agent` in passing. Audit but probably no change. +- `docs/realtime/backend/streams.mdx`, `docs/realtime/backend/input-streams.mdx` — these are the older streams API docs. Verify they don't reference `CHAT_STREAM_KEY` or `CHAT_MESSAGES_STREAM_ID` (those constants were removed). +- `docs/mcp-tools.mdx` — likely mentions the chat MCP tools. Audit for `getStartToken`-shaped examples. +- `docs/guides/example-projects/anchor-browser-web-scraper.mdx` — example project. Likely uses `chat.agent`. Audit. +- `docs/tasks/schemaTask.mdx` — only matched on the term "session" probably. Audit. + +## Update sequence + +Suggested order to minimise stale-state windows for readers: + +1. **Add the upgrade guide** (`ai-chat/upgrade-guide.mdx`) and its nav entry. This is the most-needed doc and stands alone from the rest. +2. **Update transport-shape pages** in this order: `quick-start` → `frontend` → `backend` → `server-chat` → `types` → `reference`. They all show the same callback shape; readers cross-reference between them, so they should ship together. +3. **Update peripheral pages**: `overview`, `changelog`, `client-protocol`, `error-handling`, `testing`, `features`, patterns. +4. **Remove `docs/sessions/`** + nav group last. Until step 2 lands the standalone Sessions docs are still less misleading than half-stale chat.agent docs. + +## Out of scope for this pass + +- Re-adding standalone Sessions docs (deferred until the primitive is stable for non-chat use). +- Diagrams / illustrations — text-first pass; designer can layer visuals after. +- Sample customer projects — the `references/ai-chat` reference repo is the in-source example; if marketing wants a polished standalone sample, that's a separate effort. diff --git a/.claude/review-guides/chat-agent-sessions-row-agnostic.md b/.claude/review-guides/chat-agent-sessions-row-agnostic.md new file mode 100644 index 00000000000..7fb9851f308 --- /dev/null +++ b/.claude/review-guides/chat-agent-sessions-row-agnostic.md @@ -0,0 +1,287 @@ +# Review guide — chat.agent on Sessions, row-agnostic addressing + +Scope: the 12 uncommitted files. **No new behaviour beyond the public surface +already on this branch** — this is plumbing cleanup that: + +1. Eliminates the transport's session-creation step +2. Makes `chatId` the universal addressing string everywhere +3. Makes the server-side stream/append/wait routes row-agnostic + +## The two design moves + +**Move 1 — agent owns session lifecycle.** `chat.agent` and +`chat.customAgent` upsert the backing `Session` row at bind, fire-and-forget, +keyed on `externalId = payload.chatId`. The transport, server-side +`AgentChat`, and `chat.createTriggerAction` no longer create sessions at all. +Browsers cannot mint sessions either (`POST /api/v1/sessions` is now +secret-key-only). One owner, one path. + +**Move 2 — `chatId` is the only address.** The transport, server-side +`AgentChat`, JWT scopes, and S2 stream paths all use `chatId` directly. The +Session's friendlyId is informational. To make this safe, the three stream +routes (`.in/.out` PUT, GET, POST append, plus the run-engine `wait` +endpoint) became "row-optional" and derive a *canonical addressing key* +(`row.externalId ?? row.friendlyId`, fallback to the URL param when the row +hasn't been upserted yet). Same canonical key is used to build the S2 stream +path, the waitpoint cache key, and the JWT resource set — so any caller +addressing by either form converges on the same physical stream. + +Together these remove an entire class of "did the row land yet?" races. The +transport can subscribe to `/sessions/{chatId}/out` before the agent boots, +the agent's `void sessions.create({externalId: chatId})` lands a moment +later, and any earlier reads/writes are already on the right S2 key. + +--- + +## Read in this order + +### 1. `apps/webapp/app/services/realtime/sessions.server.ts` (+34 lines) + +The new primitive. Two helpers: + +- `isSessionFriendlyIdForm(value)` — `value.startsWith("session_")`. Used to + decide whether a missing row is a hard 404 (opaque friendlyId) or a soft + "row will land later" (externalId form). +- `canonicalSessionAddressingKey(row, paramSession)` — `row.externalId ?? + row.friendlyId` if the row exists, else `paramSession`. **This is the load- + bearing function.** Read its docstring. + +**Question to ask:** can two callers addressing the "same" session ever get +different canonical keys? Only if the row exists for one and not the other, +*and* the URL forms differ — but in that case the row-less caller used the +externalId form (friendlyId-form would have 404'd earlier), and the row-ful +caller computes `row.externalId ?? row.friendlyId`. If the row's externalId +matches the URL, they converge. If it doesn't, there's no row to find by +that string anyway. The interesting edge is "row exists with no externalId", +addressed via friendlyId — both sides read `row.friendlyId`. ✓ + +### 2. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.ts` (+47/-12) + +PUT initialize + GET subscribe (SSE). Both use the helper. The interesting +part is the loader's `findResource` + `authorization.resource`: + +```ts +findResource: async (params, auth) => { + const row = await resolveSessionByIdOrExternalId(...); + if (!row && isSessionFriendlyIdForm(params.session)) return undefined; // 404 + return { row, addressingKey: canonicalSessionAddressingKey(row, params.session) }; +}, +authorization: { + resource: ({ row, addressingKey }) => { + const ids = new Set([addressingKey]); + if (row) { + ids.add(row.friendlyId); + if (row.externalId) ids.add(row.externalId); + } + return { sessions: [...ids] }; + }, + superScopes: ["read:sessions", "read:all", "admin"], +}, +``` + +**Why three IDs in the resource set?** `checkAuthorization` is "any-match" +across the resource values. We want a JWT scoped to *either* form to +authorize *either* URL form. Smoke test verified the 4-cell matrix passes. + +**The PUT path** (action handler) is simpler — it just resolves the row, +builds an addressing key, and hands it to `initializeSessionStream`. Worth +noting the `closedAt` check is now `maybeSession?.closedAt` — no row means +no closedAt to enforce. + +### 3. `apps/webapp/app/routes/realtime.v1.sessions.$session.$io.append.ts` (+22/-13) + +POST append (browser writes a record to `.in` or server writes to `.out`). +Same row-optional pattern. Both the S2 append and the waitpoint drain use +`addressingKey`. + +**Question to ask:** what fires the waitpoint? An agent's +`session.in.wait()` registers a waitpoint keyed on `(addressingKey, io)` via +the wait endpoint (file 4). The append handler drains by the *same* key — +even if the agent registered with externalId form and the transport +appended via friendlyId form, both compute the same canonical key, so they +converge. ✓ + +### 4. `apps/webapp/app/routes/api.v1.runs.$runFriendlyId.session-streams.wait.ts` (+18/-13) + +The agent's `.in.wait()` endpoint. Run-engine creates the waitpoint, then +registers it in Redis under `(addressingKey, io)`. The race-check that runs +right after creation reads from S2 by the same key. Three call sites — +`addSessionStreamWaitpoint`, `readSessionStreamRecords`, +`removeSessionStreamWaitpoint` — all consistent. + +### 5. `apps/webapp/app/routes/api.v1.sessions.ts` (+4/-2) + +**Security tightening.** Removed `allowJWT: true` and `corsStrategy: "all"` +from the `POST /api/v1/sessions` action — secret-key only now. + +**Question to ask:** was the JWT path actually used? Until this branch, the +transport called it via `ensureSession` (now deleted). After this branch, +nobody reaches it from the browser. `chat.createTriggerAction` (server +secret key) is the only browser-adjacent path. + +### 6. `packages/trigger-sdk/src/v3/ai.ts` (+62/-39) + +Two near-identical edits — one in `chatAgent`, one in `chatCustomAgent`. +Both bind on `payload.chatId` and fire-and-forget the upsert: + +```ts +locals.set(chatSessionHandleKey, sessions.open(payload.chatId)); +void sessions + .create({ type: "chat.agent", externalId: payload.chatId }) + .catch(() => { /* best effort */ }); +``` + +**Question to ask:** why `void`-and-`catch`? Awaiting the upsert would gate +the agent's bind on a network round-trip that doesn't unblock anything +user-visible — `.in/.out` routes are row-agnostic and the waitpoint cache +is keyed on the addressing string, not the row id. If the upsert genuinely +fails, the next bind retries the same idempotent call (`sessions.create` +upserts on `externalId`, so concurrent triggers on one chatId converge to +one row). The row matters for downstream metadata + listing, not for live +addressing. + +The PAT scope minting in `chatAgent` (two call sites — preload and +sendMessage) now uses `payload.chatId` for the `sessions:` resource. That +matches what the transport/AgentChat use as the JWT resource and what the +JWT's resource set in the loader includes. Cross-form addressing works +either way (smoke-tested), but using `chatId` keeps the chain tight. + +`createChatTriggerAction` is the most visibly trimmed: no pre-create, no +threading `sessionId` into payload, scope mint uses `chatId`. Return type +no longer carries `sessionId` — note `TriggerChatTaskResult.sessionId` was +already declared optional, so this isn't a public-API break. + +**Stale docstring to flag:** `chat.ts:59` and `chat.ts:112` still describe +PAT scopes as `read:sessions:{sessionId}` and +`write:sessions:{sessionId}`. Functionally either ID works (row lookup +canonicalises), but the doc text is now out of date — it should say +`{chatId}`. Worth a tidy-up before merge but not blocking. + +### 7. `packages/trigger-sdk/src/v3/chat.ts` (+63/-117) + +**The biggest mechanical edit.** Net -54 lines from deleting `ensureSession` +and untangling its callers. + +What disappeared: +- `private async ensureSession(chatId)` — gone +- The "lazy upsert from the browser if no triggerTask callback" branch in + `sendMessages` and `preload` — gone +- The "throw if neither path surfaced a sessionId" guard — gone +- All `state.sessionId` URL params replaced with `chatId` +- `subscribeToSessionStream`'s `chatId?` (optional) is now `chatId` (required) + +What stayed: +- `state.sessionId` in `ChatSessionState` — optional, informational +- The `restore from external storage` branch in the constructor still + hydrates `sessionId` if persisted, just doesn't *require* it +- `notifySessionChange` still surfaces `sessionId` if known + +**Question to ask:** does the transport ever still need the friendlyId? The +only place is the `onSessionChange` callback's payload (so consumers +persisting state can save it for later display). The transport itself never +puts it in a URL or a waitpoint key. + +The `sendMessages` path is worth re-reading: when state.runId is set, it +appends to `.in/append` and subscribes to `.out`. If the append fails with +a non-auth error, it falls through to triggering a new run (legacy "run is +dead" detection — unchanged from pre-Sessions, doesn't depend on +addressing). + +### 8. `packages/trigger-sdk/src/v3/chat-client.ts` (+34/-33) + +Server-side `AgentChat`. Mirrors the transport changes — every URL uses +`this.chatId`. `triggerNewRun` no longer pre-creates a session. `ChatSession` +and internal `SessionState` types now have optional `sessionId`. + +The shape of the diff is identical to the transport: delete the upsert, +swap addressing identifiers, optionalise the friendlyId. If you've read +`chat.ts` carefully, this one is mostly mechanical confirmation that both +client surfaces (browser transport + server-side AgentChat) speak the same +addressing protocol. + +### 9. Test infrastructure — `sessions.ts` (+18) + `mock-chat-agent.ts` (+25) + +`__setSessionCreateImplForTests` mirrors the existing +`__setSessionOpenImplForTests`. `mockChatAgent` installs a no-op create stub +returning a synthetic `CreatedSessionResponseBody` so the agent's bind-time +`void sessions.create(...)` doesn't try to hit a real API. Cleanup runs in +the same `.finally` as the open override. + +**Question to ask:** is the synthetic response shape correct? It mirrors +`CreatedSessionResponseBody` — `id`, `externalId`, `type`, `tags`, +`metadata`, `closedAt`, `closedReason`, `expiresAt`, `createdAt`, +`updatedAt`, `isCached`. Tests don't currently assert on this object, so +the bar is "doesn't crash + matches the type". Met. + +### 10. `packages/trigger-sdk/src/v3/chat.test.ts` (+13/-12) + +Three classes of test edits, all consequences: + +- Stream URL assertion: `chat-1` (the chatId) instead of + `session_streamurl` (the friendlyId) +- `renewRunAccessToken` callback: `sessionId: undefined` (was + `DEFAULT_SESSION_ID` because the mocked trigger doesn't surface it) +- Token resolve count: `1` (was `2` — second resolve was for `ensureSession`) +- One `onSessionChange` matchObject loses `sessionId` + +### 11. `apps/webapp/app/routes/_app.../playground/.../route.tsx` (1 line) + +`sessionId: string` → `sessionId?: string` in the playground sidebar prop +to track the transport type change. + +--- + +## Edge cases I checked, so you don't have to + +- **Cross-form JWT auth (curl matrix).** JWT scoped to externalId can call + externalId URL ✓ and friendlyId URL ✓. JWT scoped to friendlyId can call + externalId URL ✓ and friendlyId URL ✓. Smoke-tested. +- **Row materialises after subscribe.** Transport opens + `GET /sessions/{chatId}/out` before agent's bind upsert lands → 200 OK, + `addressingKey = chatId` (paramSession fallback). Once the row lands + with `externalId = chatId`, addressingKey resolves to the same value via + `row.externalId`. Same S2 key throughout. +- **Concurrent triggers on one chatId.** Two browser tabs trigger two runs + → two binds → two `sessions.create({externalId: chatId})` calls. Upsert + semantics: both return the same row. +- **Closed session enforcement.** Still enforced when a row exists. + `maybeSession?.closedAt` is null-safe; no row = no close-state to honour. +- **Agent run cancellation.** Frontend doesn't auto-detect — unchanged from + pre-Sessions; messages sit in S2 until the next trigger (the existing + run-PAT auth-error path is the only reaper). Out of scope for this branch. +- **Idle timeout in dev.** Runs stay `EXECUTING_WITH_WAITPOINTS` past the + configured idle because dev runs don't snapshot/restore; the in-process + idle clock advances locally without touching the row. Expected, not a + regression. + +## Things explicitly **not** in this branch + +- Run-state subscription on the transport side (the "run died, re-trigger + silently" UX gap) +- Session auto-close on agent exit (still client-driven by design) +- Any change to `Session` schema, `sessions.create` semantics, or + `chatAccessTokenTTL` +- Docstring updates for `read:sessions:{sessionId}` / `write:sessions:{sessionId}` + in `chat.ts:59` and `chat.ts:112` (functional but textually stale — + follow-up nit) + +--- + +## What I'd be ready to answer cold + +- Why fire-and-forget upsert (vs. `await`) in the agent's bind step +- Why the route's authorization resource set has three IDs (cross-form JWT + auth) +- Why `POST /api/v1/sessions` lost `allowJWT` (security tightening — no + caller needs it after the transport's `ensureSession` is gone) +- What converges two callers using different URL forms onto the same S2 + stream (`canonicalSessionAddressingKey`, identical computation on both + sides for any given row) +- What makes `sessions.create` race-safe under concurrent triggers + (`externalId` upsert) +- Why `state.sessionId` stayed on `ChatSessionState` at all (pure + informational, surfaced via `onSessionChange` for consumer persistence; + zero addressing role) +- Why the chat-client (server-side AgentChat) and chat (transport) edits + look near-identical (they implement the same client protocol against the + same row-agnostic routes) diff --git a/.claude/rules/package-installation.md b/.claude/rules/package-installation.md new file mode 100644 index 00000000000..310074823c5 --- /dev/null +++ b/.claude/rules/package-installation.md @@ -0,0 +1,22 @@ +--- +paths: + - "**/package.json" +--- + +# Installing Packages + +When adding a new dependency to any package.json in the monorepo: + +1. **Look up the latest version** on npm before adding: + ```bash + pnpm view version + ``` + If unsure which version to use (e.g. major version compatibility), confirm with the user. + +2. **Edit the package.json directly** — do NOT use `pnpm add` as it can cause issues in the monorepo. Add the dependency with the correct version range (typically `^x.y.z`). + +3. **Run `pnpm i` from the repo root** after editing to install and update the lockfile: + ```bash + pnpm i + ``` + Always run from the repo root, not from the package directory. diff --git a/.gitignore b/.gitignore index 5f6adddba0a..d071d5ae4e3 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,10 @@ apps/**/public/build /packages/trigger-sdk/src/package.json /packages/python/src/package.json **/.claude/settings.local.json +.claude/architecture/ +.claude/docs-plans/ +.claude/review-guides/ +.claude/scheduled_tasks.lock .mcp.log .mcp.json .cursor/debug.log diff --git a/.server-changes/agent-view-sessions.md b/.server-changes/agent-view-sessions.md new file mode 100644 index 00000000000..757dcdc2f40 --- /dev/null +++ b/.server-changes/agent-view-sessions.md @@ -0,0 +1,12 @@ +--- +area: webapp +type: improvement +--- + +Migrate the dashboard Agent tab (span inspector) to subscribe to the backing Session's `.out` and `.in` channels instead of the run-scoped chat output + chat-messages input streams. Pairs with the SDK + MCP migrations on the ai-chat branch. + +- `SpanPresenter.server.ts` extracts `agentSession` from the run payload (prefers `sessionId`, falls back to `chatId` for pre-Sessions agent runs — matches `resolveSessionByIdOrExternalId`). +- Span route threads `agentSession` through `AgentViewAuth` and gates `agentView` creation on having one. +- New dashboard resource route `resources.orgs.../runs.$runParam/realtime/v1/sessions/$sessionId/$io` proxies `S2RealtimeStreams.streamResponseFromSessionStream` under dashboard session auth. The run param binds resource hierarchy; the session identity is verified against the environment. +- `AgentView.tsx` subscribes to `/out` and `/in` URLs, drops local `CHAT_STREAM_KEY`/`CHAT_MESSAGES_STREAM_ID` constants, and parses the `.in` stream as `ChatInputChunk` (`{kind: "message", payload}` for user turns; `{kind: "stop"}` ignored). Output-stream parsing is unchanged — session v2 SSE already delivers UIMessageChunk objects from `record.body.data`. +- Smoke: opened a prior `test-agent` run in the dashboard, Agent tab rendered user + assistant messages end-to-end with zero console errors. Both SSE endpoints (`/out`, `/in`) returned 200. diff --git a/.server-changes/run-agent-view.md b/.server-changes/run-agent-view.md new file mode 100644 index 00000000000..76580157d60 --- /dev/null +++ b/.server-changes/run-agent-view.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Add an Agent view to the run details page for runs whose `taskKind` annotation is `AGENT`. The view renders the agent's `UIMessage` conversation by subscribing to the run's `chat` realtime stream — the same data source as the Agent Playground content view. Switching is via a `Trace view` / `Agent view` segmented control above the run body, and the selected view is reflected in the URL via `?view=agent` so it's shareable. diff --git a/.server-changes/streamdown-v2-upgrade.md b/.server-changes/streamdown-v2-upgrade.md new file mode 100644 index 00000000000..8a0b3f17af0 --- /dev/null +++ b/.server-changes/streamdown-v2-upgrade.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +Upgrade streamdown from v1.4.0 to v2.5.0. Custom Shiki syntax highlighting theme matching our CodeMirror dark theme colors. Consolidate duplicated lazy StreamdownRenderer into a shared component. diff --git a/CLAUDE.md b/CLAUDE.md index 79d931a4548..17552ef54e4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,6 +6,8 @@ This file provides guidance to Claude Code when working with this repository. Su This is a pnpm 10.23.0 monorepo using Turborepo. Run commands from root with `pnpm run`. +**Adding dependencies:** Edit `package.json` directly instead of using `pnpm add`, then run `pnpm i` from the repo root. See `.claude/rules/package-installation.md` for the full process. + ```bash pnpm run docker # Start Docker services (PostgreSQL, Redis, Electric) pnpm run db:migrate # Run database migrations diff --git a/apps/webapp/app/components/BulkActionFilterSummary.tsx b/apps/webapp/app/components/BulkActionFilterSummary.tsx index a230e70b346..c5d1a2f48d7 100644 --- a/apps/webapp/app/components/BulkActionFilterSummary.tsx +++ b/apps/webapp/app/components/BulkActionFilterSummary.tsx @@ -240,6 +240,19 @@ export function BulkActionFilterSummary({ /> ); } + case "sources": { + const values = Array.isArray(value) ? value : [`${value}`]; + return ( + + ); + } default: { assertNever(typedKey); } diff --git a/apps/webapp/app/components/code/AIQueryInput.tsx b/apps/webapp/app/components/code/AIQueryInput.tsx index 0775ec2c2a0..cd5e9db3bd8 100644 --- a/apps/webapp/app/components/code/AIQueryInput.tsx +++ b/apps/webapp/app/components/code/AIQueryInput.tsx @@ -1,25 +1,15 @@ import { CheckIcon, PencilSquareIcon, PlusIcon, XMarkIcon } from "@heroicons/react/20/solid"; import { AnimatePresence, motion } from "framer-motion"; -import { Suspense, lazy, useCallback, useEffect, useRef, useState } from "react"; +import { Suspense, useCallback, useEffect, useRef, useState } from "react"; import { Button } from "~/components/primitives/Buttons"; import { Spinner } from "~/components/primitives/Spinner"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import type { AITimeFilter } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.query/types"; import { cn } from "~/utils/cn"; -// Lazy load streamdown components to avoid SSR issues -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children, isAnimating }: { children: string; isAnimating: boolean }) => ( - - {children} - - ), - })) -); - type StreamEventType = | { type: "thinking"; content: string } | { type: "tool_call"; tool: string; args: unknown } diff --git a/apps/webapp/app/components/code/StreamdownRenderer.tsx b/apps/webapp/app/components/code/StreamdownRenderer.tsx new file mode 100644 index 00000000000..996234ab180 --- /dev/null +++ b/apps/webapp/app/components/code/StreamdownRenderer.tsx @@ -0,0 +1,29 @@ +import { lazy } from "react"; +import type { CodeHighlighterPlugin } from "streamdown"; + +export const StreamdownRenderer = lazy(() => + Promise.all([import("streamdown"), import("@streamdown/code"), import("./shikiTheme")]).then( + ([{ Streamdown }, { createCodePlugin }, { triggerDarkTheme }]) => { + // Type assertion needed: @streamdown/code and streamdown resolve different shiki + // versions under pnpm, causing structurally-identical CodeHighlighterPlugin types + // to be considered incompatible (different BundledLanguage string unions). + const codePlugin = createCodePlugin({ + themes: [triggerDarkTheme, triggerDarkTheme], + }) as unknown as CodeHighlighterPlugin; + + return { + default: ({ + children, + isAnimating = false, + }: { + children: string; + isAnimating?: boolean; + }) => ( + + {children} + + ), + }; + } + ) +); diff --git a/apps/webapp/app/components/code/shikiTheme.ts b/apps/webapp/app/components/code/shikiTheme.ts new file mode 100644 index 00000000000..5d47155b979 --- /dev/null +++ b/apps/webapp/app/components/code/shikiTheme.ts @@ -0,0 +1,222 @@ +import type { ThemeRegistrationAny } from "streamdown"; + +// Custom Shiki theme matching the Trigger.dev VS Code dark theme. +// Colors taken directly from the VS Code extension's tokenColors. +export const triggerDarkTheme: ThemeRegistrationAny = { + name: "trigger-dark", + type: "dark", + colors: { + "editor.background": "#212327", + "editor.foreground": "#878C99", + "editorLineNumber.foreground": "#484c54", + }, + tokenColors: [ + // Control flow keywords: pink-purple + { + scope: [ + "keyword.control", + "keyword.operator.delete", + "keyword.other.using", + "keyword.other.operator", + "entity.name.operator", + ], + settings: { foreground: "#E888F8" }, + }, + // Storage type (const, let, var, function, class): purple + { + scope: "storage.type", + settings: { foreground: "#8271ED" }, + }, + // Storage modifiers (async, export, etc.): purple + { + scope: ["storage.modifier", "keyword.operator.noexcept"], + settings: { foreground: "#8271ED" }, + }, + // Keyword operator expressions (new, typeof, instanceof, etc.): purple + { + scope: [ + "keyword.operator.new", + "keyword.operator.expression", + "keyword.operator.cast", + "keyword.operator.sizeof", + "keyword.operator.instanceof", + "keyword.operator.logical.python", + "keyword.operator.wordlike", + ], + settings: { foreground: "#8271ED" }, + }, + // Types and namespaces: hot pink + { + scope: [ + "support.class", + "support.type", + "entity.name.type", + "entity.name.namespace", + "entity.name.scope-resolution", + "entity.name.class", + "entity.other.inherited-class", + ], + settings: { foreground: "#F770C6" }, + }, + // Functions: lime/yellow-green + { + scope: ["entity.name.function", "support.function"], + settings: { foreground: "#D9F07C" }, + }, + // Variables and parameters: light lavender + { + scope: [ + "variable", + "meta.definition.variable.name", + "support.variable", + "entity.name.variable", + "constant.other.placeholder", + ], + settings: { foreground: "#CCCBFF" }, + }, + // Constants and enums: medium purple + { + scope: ["variable.other.constant", "variable.other.enummember"], + settings: { foreground: "#9C9AF2" }, + }, + // this/self: purple-blue + { + scope: "variable.language", + settings: { foreground: "#9B99FF" }, + }, + // Object literal keys: medium purple-blue + { + scope: "meta.object-literal.key", + settings: { foreground: "#8B89FF" }, + }, + // Strings: sage green + { + scope: ["string", "meta.embedded.assembly"], + settings: { foreground: "#AFEC73" }, + }, + // String interpolation punctuation: blue-purple + { + scope: [ + "punctuation.definition.template-expression.begin", + "punctuation.definition.template-expression.end", + "punctuation.section.embedded", + ], + settings: { foreground: "#7A78EA" }, + }, + // Template expression reset + { + scope: "meta.template.expression", + settings: { foreground: "#d4d4d4" }, + }, + // Operators: gray (same as foreground) + { + scope: "keyword.operator", + settings: { foreground: "#878C99" }, + }, + // Comments: olive gray + { + scope: "comment", + settings: { foreground: "#6f736d" }, + }, + // Language constants (true, false, null, undefined): purple-blue + { + scope: "constant.language", + settings: { foreground: "#9B99FF" }, + }, + // Numeric constants: light green + { + scope: [ + "constant.numeric", + "keyword.operator.plus.exponent", + "keyword.operator.minus.exponent", + ], + settings: { foreground: "#b5cea8" }, + }, + // Regex: dark red + { + scope: "constant.regexp", + settings: { foreground: "#646695" }, + }, + // HTML/JSX tags: purple-blue + { + scope: "entity.name.tag", + settings: { foreground: "#9B99FF" }, + }, + // Tag brackets: dark gray + { + scope: "punctuation.definition.tag", + settings: { foreground: "#5F6570" }, + }, + // HTML/JSX attributes: light purple + { + scope: "entity.other.attribute-name", + settings: { foreground: "#C39EFF" }, + }, + // Escape characters: gold + { + scope: "constant.character.escape", + settings: { foreground: "#d7ba7d" }, + }, + // Regex string: dark red + { + scope: "string.regexp", + settings: { foreground: "#d16969" }, + }, + // Storage: purple-blue + { + scope: "storage", + settings: { foreground: "#9B99FF" }, + }, + // TS-specific: type casts, math/dom/json constants + { + scope: [ + "meta.type.cast.expr", + "meta.type.new.expr", + "support.constant.math", + "support.constant.dom", + "support.constant.json", + ], + settings: { foreground: "#9B99FF" }, + }, + // Markdown headings: purple-blue bold + { + scope: "markup.heading", + settings: { foreground: "#9B99FF", fontStyle: "bold" }, + }, + // Markup bold: purple-blue + { + scope: "markup.bold", + settings: { foreground: "#9B99FF", fontStyle: "bold" }, + }, + // Markup inline raw: sage green + { + scope: "markup.inline.raw", + settings: { foreground: "#AFEC73" }, + }, + // Markup inserted: light green + { + scope: "markup.inserted", + settings: { foreground: "#b5cea8" }, + }, + // Markup deleted: sage green + { + scope: "markup.deleted", + settings: { foreground: "#AFEC73" }, + }, + // Markup changed: purple-blue + { + scope: "markup.changed", + settings: { foreground: "#9B99FF" }, + }, + // Invalid: red + { + scope: "invalid", + settings: { foreground: "#f44747" }, + }, + // JSX text content + { + scope: ["meta.jsx.children"], + settings: { foreground: "#D7D9DD" }, + }, + ], +}; diff --git a/apps/webapp/app/components/navigation/SideMenu.tsx b/apps/webapp/app/components/navigation/SideMenu.tsx index dbc4c213f08..2c52c022b77 100644 --- a/apps/webapp/app/components/navigation/SideMenu.tsx +++ b/apps/webapp/app/components/navigation/SideMenu.tsx @@ -10,6 +10,7 @@ import { ClockIcon, Cog8ToothIcon, CogIcon, + CpuChipIcon, CubeIcon, ExclamationTriangleIcon, FolderIcon, @@ -69,7 +70,9 @@ import { organizationTeamPath, queryPath, regionsPath, + v3AgentsPath, v3ApiKeysPath, + v3PlaygroundPath, v3BatchesPath, v3BillingPath, v3BuiltInDashboardPath, @@ -467,6 +470,22 @@ export function SideMenu({ initialCollapsed={getSectionCollapsed(user.dashboardPreferences.sideMenu, "ai")} onCollapseToggle={handleSectionToggle("ai")} > + + - import("streamdown").then((mod) => ({ - default: ({ children }: { children: string }) => ( - - {children} - - ), - })) -); - type PromptTab = "overview" | "input" | "template"; export function PromptSpanDetails({ diff --git a/apps/webapp/app/components/runs/v3/RunFilters.tsx b/apps/webapp/app/components/runs/v3/RunFilters.tsx index 83ebaa0d51b..16e54025f13 100644 --- a/apps/webapp/app/components/runs/v3/RunFilters.tsx +++ b/apps/webapp/app/components/runs/v3/RunFilters.tsx @@ -2,6 +2,7 @@ import * as Ariakit from "@ariakit/react"; import { CalendarIcon, ClockIcon, + CpuChipIcon, FingerPrintIcon, RectangleStackIcon, Squares2X2Icon, @@ -184,6 +185,9 @@ export const TaskRunListSearchFilters = z.object({ `Machine presets to filter by (${machines.join(", ")})` ), errorId: z.string().optional().describe("Error ID to filter runs by (e.g. error_abc123)"), + sources: StringOrStringArray.describe( + "Task trigger sources to filter by (STANDARD, SCHEDULED, AGENT)" + ), }); export type TaskRunListSearchFilters = z.infer; @@ -225,6 +229,8 @@ export function filterTitle(filterKey: string) { return "Version"; case "errorId": return "Error ID"; + case "sources": + return "Source"; default: return filterKey; } @@ -265,6 +271,8 @@ export function filterIcon(filterKey: string): ReactNode | undefined { return ; case "errorId": return ; + case "sources": + return ; default: return undefined; } @@ -312,6 +320,10 @@ export function getRunFiltersFromSearchParams( ? searchParams.getAll("versions") : undefined, errorId: searchParams.get("errorId") ?? undefined, + sources: + searchParams.getAll("sources").filter((v) => v.length > 0).length > 0 + ? searchParams.getAll("sources") + : undefined, }; const parsed = TaskRunListSearchFilters.safeParse(params); @@ -353,7 +365,8 @@ export function RunsFilters(props: RunFiltersProps) { searchParams.has("queues") || searchParams.has("machines") || searchParams.has("versions") || - searchParams.has("errorId"); + searchParams.has("errorId") || + searchParams.has("sources"); return (
@@ -390,6 +403,7 @@ const filterTypes = [ { name: "schedule", title: "Schedule ID", icon: }, { name: "bulk", title: "Bulk action", icon: }, { name: "error", title: "Error ID", icon: }, + { name: "source", title: "Source", icon: }, ] as const; type FilterType = (typeof filterTypes)[number]["name"]; @@ -445,6 +459,7 @@ function AppliedFilters({ possibleTasks, bulkActions }: RunFiltersProps) { + ); } @@ -483,6 +498,8 @@ function Menu(props: MenuProps) { return props.setFilterType(undefined)} {...props} />; case "error": return props.setFilterType(undefined)} {...props} />; + case "source": + return props.setFilterType(undefined)} {...props} />; } } @@ -1896,3 +1913,101 @@ function AppliedErrorIdFilter() { ); } + +const sourceOptions: { value: TaskTriggerSource; title: string }[] = [ + { value: "STANDARD", title: "Standard" }, + { value: "SCHEDULED", title: "Scheduled" }, + { value: "AGENT", title: "Agent" }, +]; + +function SourceDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (values: string[]) => { + clearSearchValue(); + replace({ sources: values, cursor: undefined, direction: undefined }); + }; + + const filtered = useMemo(() => { + return sourceOptions.filter((item) => + item.title.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {filtered.map((item, index) => ( + + } + shortcut={shortcutFromIndex(index, { shortcutsEnabled: true })} + > + {item.title} + + ))} + + + + ); +} + +function AppliedSourceFilter() { + const { values, del } = useSearchParams(); + const sources = values("sources"); + + if (sources.length === 0 || sources.every((v) => v === "")) { + return null; + } + + return ( + + {(search, setSearch) => ( + }> + } + value={appliedSummary( + sources.map( + (v) => sourceOptions.find((o) => o.value === v)?.title ?? v + ) + )} + onRemove={() => del(["sources", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} diff --git a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx index fbede0e7cec..2d14127199c 100644 --- a/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx +++ b/apps/webapp/app/components/runs/v3/TaskRunsTable.tsx @@ -55,8 +55,10 @@ import { filterableTaskRunStatuses, TaskRunStatusCombo, } from "./TaskRunStatus"; +import { TaskTriggerSourceIcon } from "./TaskTriggerSource"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { useSearchParams } from "~/hooks/useSearchParam"; +import type { TaskTriggerSource } from "@trigger.dev/database"; type RunsTableProps = { total: number; @@ -343,6 +345,10 @@ export function TaskRunsTable({ + {run.taskIdentifier} {run.rootTaskRunId === null ? Root : null} diff --git a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx index 8d81e2f36c3..dc61644e14c 100644 --- a/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx +++ b/apps/webapp/app/components/runs/v3/TaskTriggerSource.tsx @@ -1,4 +1,4 @@ -import { ClockIcon } from "@heroicons/react/20/solid"; +import { ClockIcon, CpuChipIcon } from "@heroicons/react/20/solid"; import type { TaskTriggerSource } from "@trigger.dev/database"; import { TaskIconSmall } from "~/assets/icons/TaskIcon"; import { cn } from "~/utils/cn"; @@ -19,6 +19,11 @@ export function TaskTriggerSourceIcon({ ); } + case "AGENT": { + return ( + + ); + } } } @@ -30,5 +35,8 @@ export function taskTriggerSourceDescription(source: TaskTriggerSource) { case "SCHEDULED": { return "Scheduled task"; } + case "AGENT": { + return "Agent"; + } } } diff --git a/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx b/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx new file mode 100644 index 00000000000..72f019bb59f --- /dev/null +++ b/apps/webapp/app/components/runs/v3/agent/AgentMessageView.tsx @@ -0,0 +1,246 @@ +import type { UIMessage } from "@ai-sdk/react"; +import { memo } from "react"; +import { + AssistantResponse, + ChatBubble, + ToolUseRow, +} from "~/components/runs/v3/ai/AIChatMessages"; +import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; + +// --------------------------------------------------------------------------- +// AgentMessageView — renders an AI SDK UIMessage[] conversation. +// +// Extracted from the playground route so it can be reused on the run details +// page when the user picks the Agent view. +// +// UIMessage part types (AI SDK): +// text — markdown text content +// reasoning — model reasoning/thinking +// tool-{name} — tool call with input/output/state +// source-url — citation link +// source-document — citation document reference +// file — file attachment (image, etc.) +// step-start — visual separator between steps +// data-{name} — custom data parts (rendered as a small popover) +// --------------------------------------------------------------------------- + +export function AgentMessageView({ messages }: { messages: UIMessage[] }) { + return ( +
+ {messages.map((msg) => ( + + ))} +
+ ); +} + +// Memoized so stable messages (anything older than the one currently +// streaming) don't re-render on every chunk. This matters a lot during +// `resumeStream()` history replay, where each re-render would otherwise +// re-run Prism highlighting on every tool-call CodeBlock in the list. +// +// Default shallow prop comparison is fine: AI SDK's useChat keeps stable +// references for messages that haven't changed, so only the last message +// (the one receiving new chunks) re-renders. +export const MessageBubble = memo(function MessageBubble({ + message, +}: { + message: UIMessage; +}) { + if (message.role === "user") { + const text = + message.parts + ?.filter((p) => p.type === "text") + .map((p) => (p as { type: "text"; text: string }).text) + .join("") ?? ""; + + return ( +
+
+
{text}
+
+
+ ); + } + + if (message.role === "assistant") { + const hasContent = message.parts && message.parts.length > 0; + if (!hasContent) return null; + + return ( +
+ {message.parts?.map((part, i) => renderPart(part, i))} +
+ ); + } + + return null; +}); + +export function renderPart(part: UIMessage["parts"][number], i: number) { + const p = part as any; + const type = part.type as string; + + // Text — markdown rendered via AssistantResponse + if (type === "text") { + return p.text ? : null; + } + + // Reasoning — amber-bordered italic block + if (type === "reasoning") { + return ( +
+ +
+ {p.text ?? ""} +
+
+
+ ); + } + + // Tool call — type: "tool-{name}" with toolCallId, input, output, state + if (type.startsWith("tool-")) { + const toolName = type.slice(5); + + // Sub-agent tool: output is a UIMessage with parts + const isSubAgent = + p.output != null && typeof p.output === "object" && Array.isArray(p.output.parts); + + // For sub-agent tools, show the last text part as the "output" tab + // (mirrors what toModelOutput typically sends to the parent LLM) + // instead of dumping the full UIMessage JSON. + let resultOutput: string | undefined; + if (isSubAgent) { + const lastText = (p.output.parts as any[]) + .filter((part: any) => part.type === "text" && part.text) + .pop(); + resultOutput = lastText?.text ?? undefined; + } else if (p.output != null) { + resultOutput = + typeof p.output === "string" ? p.output : JSON.stringify(p.output, null, 2); + } + + return ( + + ); + } + + // Source URL — clickable citation link + if (type === "source-url") { + return ( + + ); + } + + // Source document — citation label + if (type === "source-document") { + return ( +
+ {p.title} + {p.mediaType ? ` (${p.mediaType})` : ""} +
+ ); + } + + // File — render as image if image type, otherwise as download link + if (type === "file") { + const isImage = typeof p.mediaType === "string" && p.mediaType.startsWith("image/"); + if (isImage) { + return ( + {p.filename + ); + } + return ( + + ); + } + + // Step start — subtle dashed separator with centered label + if (type === "step-start") { + return ( +
+
+ step +
+
+ ); + } + + // Data parts — type: "data-{name}", show as labeled JSON popover + if (type.startsWith("data-")) { + const dataName = type.slice(5); + return ; + } + + return null; +} + +function DataPartPopover({ name, data }: { name: string; data: unknown }) { + const formatted = JSON.stringify(data, null, 2); + + return ( + + + + + +
+ data-{name} +
+
+
{formatted}
+
+
+
+ ); +} diff --git a/apps/webapp/app/components/runs/v3/agent/AgentView.tsx b/apps/webapp/app/components/runs/v3/agent/AgentView.tsx new file mode 100644 index 00000000000..c54904d5b9e --- /dev/null +++ b/apps/webapp/app/components/runs/v3/agent/AgentView.tsx @@ -0,0 +1,726 @@ +import type { UIMessage } from "@ai-sdk/react"; +import { SSEStreamSubscription } from "@trigger.dev/core/v3"; +import { useEffect, useMemo, useRef, useState } from "react"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { AgentMessageView } from "~/components/runs/v3/agent/AgentMessageView"; +import { useAutoScrollToBottom } from "~/hooks/useAutoScrollToBottom"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; + +export type AgentViewAuth = { + publicAccessToken: string; + apiOrigin: string; + /** + * Session identifier the AgentView uses to address the backing + * {@link Session} when subscribing to `.in` / `.out`. Accepts either + * a `session_*` friendlyId or the transport-supplied externalId + * (typically the browser's `chatId`) — the dashboard resource route + * resolves either form via `resolveSessionByIdOrExternalId`. + */ + sessionId: string; + /** + * User messages extracted from the run's task payload at load time. + * Empty array for runs started with `trigger: "preload"` — in that + * case the first user message arrives over the session's `.in` + * channel and is merged in by the AgentView subscription. + */ + initialMessages: UIMessage[]; +}; + +type AgentViewRun = { + friendlyId: string; + taskIdentifier: string; +}; + +/** + * Max state-update interval while assistant chunks are streaming. Matches + * the `experimental_throttle: 100` we previously passed to `useChat`. + * Chunks mutate a staging ref synchronously; a throttled flush copies the + * ref into React state at most ~10x/sec so tool-call Prism highlighting + * etc. doesn't re-run on every single text-delta. + */ +const STATE_FLUSH_THROTTLE_MS = 100; + +/** + * Sentinel timestamp for messages that came from the run's initial task + * payload — they predate any stream activity, so 0 guarantees they sort + * first regardless of stream race order. + */ +const INITIAL_PAYLOAD_TIMESTAMP = 0; + +/** + * Renders an agent run's chat conversation as it unfolds. + * + * Subscribes to both channels of the run's backing {@link Session}: + * - **`.out`** delivers assistant `UIMessageChunk`s (text deltas, tool + * calls, reasoning, etc.) produced by the agent's + * `chatStream.writer(...)` calls — objects, already parsed by the S2 + * SSE reader. + * - **`.in`** delivers {@link ChatInputChunk}s sent by + * {@link TriggerChatTransport} (or any other session writer). Each + * chunk is a tagged union (`{kind: "message", payload}` for user + * turns, `{kind: "stop"}` for stop signals) — the AgentView only + * cares about `kind: "message"` and pulls `.payload.messages`. + * + * Both streams are read directly via `SSEStreamSubscription` through the + * dashboard's session-authed resource routes — not through `useChat` or + * `TriggerChatTransport`. This gives us per-chunk server-side timestamps + * (S2 sequence numbers) from both streams, which we use to produce a + * chronologically correct merged message list that works for replays, + * multi-message turns, cross-run session resumes, and steering messages. + * + * Intended to be mounted inside a scrollable container — the component + * does not own its own scrollbar. + */ +export function AgentView({ + run, + agentView, +}: { + run: AgentViewRun; + agentView: AgentViewAuth; +}) { + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const messages = useAgentRunMessages({ + runFriendlyId: run.friendlyId, + sessionId: agentView.sessionId, + apiOrigin: agentView.apiOrigin, + orgSlug: organization.slug, + projectSlug: project.slug, + envSlug: environment.slug, + initialMessages: agentView.initialMessages, + }); + + // Sticky-bottom auto-scroll: walks up to find the inspector's scroll + // container, then scrolls to bottom whenever `messages` changes — but + // only if the user was at (or near) the bottom at the time. Scrolling + // away pauses auto-scroll; scrolling back resumes it. + const rootRef = useAutoScrollToBottom([messages]); + + return ( +
+ {messages.length === 0 ? ( +
+
+ + + Loading conversation… + +
+
+ ) : ( + + )} +
+ ); +} + +// --------------------------------------------------------------------------- +// useAgentRunMessages — reads both realtime streams for a run and maintains +// a chronologically ordered, merged message list. +// --------------------------------------------------------------------------- + +/** + * Shape of each chunk on the session's `.in` channel. Mirrors the + * `ChatInputChunk` tagged union produced by {@link TriggerChatTransport}: + * - `kind: "message"` carries a `ChatTaskWirePayload` in `.payload` + * (user-submitted messages or regenerate calls); we dedupe by id. + * - `kind: "stop"` is a stop signal — no messages, nothing to render + * here, so it's filtered. + * + * The server wraps records in `{data, id}` and writes `data` as a JSON + * string; SSE v2 delivers the parsed string back. {@link parseChunkPayload} + * re-parses to recover the object. + */ +type InputStreamChunk = { + kind?: "message" | "stop"; + payload?: { + messages?: Array<{ id?: string; role?: string; parts?: unknown[] }>; + trigger?: string; + }; + message?: string; +}; + +/** + * Minimal typing for the chunks we care about on the chat output stream. + * Covers the AI SDK `UIMessageChunk` variants that `renderPart` actually + * knows how to display, plus the Trigger.dev control chunks that we filter. + */ +type OutputChunk = { type: string; [key: string]: unknown }; + +/** + * Per-message orchestration state for the output stream accumulator. Mirrors + * the active-part tracking that AI SDK's `processUIMessageStream` keeps in + * its `state` object: a registry of streaming text/reasoning parts so deltas + * can be matched to the right part by id, plus a way to clear them at step + * boundaries (`finish-step`) so the next step's `text-start`/`reasoning-start` + * with the same id starts a fresh part instead of appending to the previous + * step's part. + */ +/** + * Per-message orchestration state — index-based active-part tracking. + * + * Each map points from a part id (text or reasoning) to **the index of the + * currently-streaming part with that id in `message.parts`**. We need + * indexes (not just a `Set` of "active ids") because part ids are *only + * unique within a step*: the SDK happily reuses `text-start id="0"` after + * a `finish-step` boundary. Without index tracking, a `text-delta` for the + * reused id would have to find the right part by id alone — and a search + * would match BOTH the previous step's frozen part and the current step's + * fresh one, which produces a duplication where the previous text gets + * the new content appended to it AND a fresh part with the same content + * also appears. + * + * Mirrors AI SDK's `processUIMessageStream`'s `state.activeTextParts` / + * `state.activeReasoningParts` (which hold direct references in the + * mutating canonical impl). We use indexes here because we do immutable + * updates and need indices that survive `parts.map()` rewrites — adding + * new parts and updating existing ones never reorders, so an index is + * stable for the lifetime of the part. + */ +type MessageOrchestrationState = { + activeTextPartIndexes: Map; + activeReasoningPartIndexes: Map; +}; + +/** + * `SSEStreamSubscription`'s v2 batch path delivers `parsedBody.data` as-is + * — but session channels diverge by direction: + * + * - `.in`: {@link TriggerChatTransport.serializeInputChunk} writes the + * `ChatInputChunk` as a JSON **string**, so `data` is a string that + * needs a second `JSON.parse` to recover the tagged union. + * - `.out`: the agent's `chatStream.writer(...)` writes + * {@link UIMessageChunk} **objects** directly; `data` arrives + * already-parsed. + * + * This helper accepts both shapes defensively: a string is parsed; an + * object is returned as-is. Returns `null` for unparseable payloads. + */ +function parseChunkPayload(raw: unknown): Record | null { + if (raw == null) return null; + if (typeof raw === "string") { + try { + const parsed = JSON.parse(raw); + return parsed && typeof parsed === "object" ? (parsed as Record) : null; + } catch { + return null; + } + } + if (typeof raw === "object") return raw as Record; + return null; +} + +function createOrchestrationState(): MessageOrchestrationState { + return { + activeTextPartIndexes: new Map(), + activeReasoningPartIndexes: new Map(), + }; +} + +function useAgentRunMessages({ + runFriendlyId, + sessionId, + apiOrigin, + orgSlug, + projectSlug, + envSlug, + initialMessages, +}: { + runFriendlyId: string; + sessionId: string; + apiOrigin: string; + orgSlug: string; + projectSlug: string; + envSlug: string; + initialMessages: UIMessage[]; +}): UIMessage[] { + // Seed with the user messages from the run's task payload. + const seedMessages = useMemo( + () => initialMessages.filter((m) => m.role === "user"), + [initialMessages] + ); + + // `pendingRef` is the authoritative, eagerly-updated message state: + // chunks mutate this synchronously as they arrive. A throttled flush + // copies it into React state so UI updates are capped at ~10x/sec. + const pendingRef = useRef>( + new Map(seedMessages.map((m) => [m.id, m])) + ); + const timestampsRef = useRef>( + new Map(seedMessages.map((m) => [m.id, INITIAL_PAYLOAD_TIMESTAMP])) + ); + // Side-table of orchestration state, keyed by assistant message id. Lives + // outside the UIMessage so React doesn't see it as a renderable prop. + const orchestrationRef = useRef>(new Map()); + + // React state snapshot of pendingRef. Only updated via the throttled + // `scheduleFlush`. The Map *reference* changes on every flush so React + // detects the state update and the downstream `useMemo` recomputes. + const [messagesById, setMessagesById] = useState>( + () => new Map(pendingRef.current) + ); + + // Throttled flush scheduler — leading edge within a single throttle + // window: the first chunk after a quiet period flushes immediately, then + // subsequent chunks coalesce until the next window opens. + const lastFlushAtRef = useRef(0); + const pendingTimerRef = useRef | null>(null); + const scheduleFlush = useRef<() => void>(() => {}); + scheduleFlush.current = () => { + if (pendingTimerRef.current !== null) return; // already scheduled + const now = Date.now(); + const sinceLast = now - lastFlushAtRef.current; + const delay = Math.max(0, STATE_FLUSH_THROTTLE_MS - sinceLast); + pendingTimerRef.current = setTimeout(() => { + pendingTimerRef.current = null; + lastFlushAtRef.current = Date.now(); + setMessagesById(new Map(pendingRef.current)); + }, delay); + }; + + useEffect(() => { + const abort = new AbortController(); + + const encodedSession = encodeURIComponent(sessionId); + const sessionBase = + `${apiOrigin}/resources/orgs/${orgSlug}/projects/${projectSlug}/env/${envSlug}` + + `/runs/${runFriendlyId}/realtime/v1/sessions/${encodedSession}`; + + const outputUrl = `${sessionBase}/out`; + const inputUrl = `${sessionBase}/in`; + + const commonSubOptions = { + signal: abort.signal, + timeoutInSeconds: 120, + } as const; + + // ---- Output stream: assistant messages --------------------------------- + // + // The output stream delivers UIMessageChunks interleaved with + // Trigger-specific control chunks (`trigger:turn-complete`, etc.). We + // filter the control chunks and fold everything else into an assistant + // `UIMessage` via our own `applyOutputChunk` accumulator — the AI SDK's + // `readUIMessageStream` helper is only available in `ai@6`, and the + // webapp is pinned to `ai@4`, so we re-implement just the chunk types + // that `renderPart` actually displays. + // + // We capture the **server timestamp of each assistant message's first + // `start` chunk** so later sort-by-timestamp merges with the input + // stream correctly. + const runOutput = async () => { + try { + const sub = new SSEStreamSubscription(outputUrl, commonSubOptions); + const raw = await sub.subscribe(); + const reader = raw.getReader(); + + let currentMessageId: string | null = null; + + try { + while (!abort.signal.aborted) { + const { done, value } = await reader.read(); + if (done) return; + + const chunk = parseChunkPayload(value.chunk) as OutputChunk | null; + if (!chunk || typeof chunk.type !== "string") continue; + if (chunk.type.startsWith("trigger:")) continue; + + if (chunk.type === "start") { + const messageId = + typeof chunk.messageId === "string" && chunk.messageId.length > 0 + ? chunk.messageId + : `asst-${crypto.randomUUID()}`; + currentMessageId = messageId; + + if (!timestampsRef.current.has(messageId)) { + timestampsRef.current.set(messageId, value.timestamp); + } + + const existing = pendingRef.current.get(messageId); + if (existing) { + // Same message id seen again — merge metadata only, keep + // existing parts (canonical `processUIMessageStream` does + // the same on a repeated `start`). + if (chunk.messageMetadata != null) { + pendingRef.current.set(messageId, { + ...existing, + metadata: { + ...((existing as { metadata?: Record }).metadata ?? {}), + ...(chunk.messageMetadata as Record), + }, + } as UIMessage); + scheduleFlush.current(); + } + } else { + const message: UIMessage = { + id: messageId, + role: "assistant", + parts: [], + ...(chunk.messageMetadata != null + ? { metadata: chunk.messageMetadata as UIMessage["metadata"] } + : {}), + } as UIMessage; + pendingRef.current.set(messageId, message); + orchestrationRef.current.set(messageId, createOrchestrationState()); + scheduleFlush.current(); + } + continue; + } + + if (currentMessageId === null) continue; + const existing = pendingRef.current.get(currentMessageId); + if (!existing) continue; + let orchestration = orchestrationRef.current.get(currentMessageId); + if (!orchestration) { + // Defensive: a chunk arrived for a message we never saw a + // `start` for. Lazily create orchestration state so we can + // still display the parts. + orchestration = createOrchestrationState(); + orchestrationRef.current.set(currentMessageId, orchestration); + } + + const updated = applyOutputChunk(existing, chunk, orchestration); + if (updated !== existing) { + pendingRef.current.set(currentMessageId, updated); + scheduleFlush.current(); + } + } + } finally { + try { + reader.releaseLock(); + } catch { + // Lock may already be released. + } + } + } catch (err) { + if (abort.signal.aborted) return; + // eslint-disable-next-line no-console + console.debug("[AgentView] output stream subscription failed", err); + } + }; + + // ---- Input channel: user messages (`ChatInputChunk`) ------------------- + // + // The transport appends a `{kind: "message", payload}` ChatInputChunk + // for every user turn (and `{kind: "stop"}` for stop signals). We pull + // user messages out of `payload.messages` for `kind: "message"` chunks + // and ignore the rest. + const runInput = async () => { + try { + const sub = new SSEStreamSubscription(inputUrl, commonSubOptions); + const raw = await sub.subscribe(); + const reader = raw.getReader(); + try { + while (!abort.signal.aborted) { + const { done, value } = await reader.read(); + if (done) return; + + const chunk = parseChunkPayload(value.chunk) as InputStreamChunk | null; + if (!chunk || chunk.kind !== "message") continue; + const payload = chunk.payload; + if (!payload || !Array.isArray(payload.messages)) continue; + + const incomingUsers = payload.messages.filter( + (m): m is UIMessage => + m != null && (m as { role?: string }).role === "user" && typeof m.id === "string" + ); + if (incomingUsers.length === 0) continue; + + let changed = false; + for (const msg of incomingUsers) { + if (pendingRef.current.has(msg.id)) continue; + pendingRef.current.set(msg.id, msg); + timestampsRef.current.set(msg.id, value.timestamp); + changed = true; + } + if (changed) scheduleFlush.current(); + } + } finally { + try { + reader.releaseLock(); + } catch { + // Lock may already be released. + } + } + } catch (err) { + if (abort.signal.aborted) return; + // eslint-disable-next-line no-console + console.debug("[AgentView] input stream subscription failed", err); + } + }; + + void runOutput(); + void runInput(); + + return () => { + abort.abort(); + if (pendingTimerRef.current !== null) { + clearTimeout(pendingTimerRef.current); + pendingTimerRef.current = null; + } + }; + }, [runFriendlyId, sessionId, apiOrigin, orgSlug, projectSlug, envSlug]); + + return useMemo(() => { + const timestamps = timestampsRef.current; + const arr = Array.from(messagesById.values()); + arr.sort((a, b) => { + const ta = timestamps.get(a.id) ?? 0; + const tb = timestamps.get(b.id) ?? 0; + if (ta !== tb) return ta - tb; + // Tie-breaker for messages sharing a stream ID bucket (rare): fall + // back to message id string order so the output is deterministic. + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); + return arr; + }, [messagesById]); +} + +// --------------------------------------------------------------------------- +// applyOutputChunk — minimal UIMessageChunk → UIMessage accumulator. +// --------------------------------------------------------------------------- +// +// A pared-down re-implementation of AI SDK's `processUIMessageStream` (in +// `ai@6`'s `index.mjs`). The webapp is pinned to `ai@4`, which doesn't ship +// the v5+ chunk-stream helpers, so we vendor the bits we actually use. +// +// Scope vs. canonical: +// - We render only the chunk shapes that `AgentMessageView`/`renderPart` +// actually display: text, reasoning, tool-* (input-{start,delta,available} +// + output-{available,error}), source-url, source-document, file, +// step-start/finish-step, data-*, plus metadata/finish lifecycle. +// - Unknown chunk types fall through as no-ops — defensive on purpose for a +// read-only viewer. +// - We **do not parse partial JSON for streaming tool inputs.** Canonical +// uses `parsePartialJson` (which depends on a 300-line `fixJson` state +// machine to repair incomplete JSON) so users see the input growing +// character-by-character. We skip it: tool inputs stay `undefined` +// throughout streaming and snap to the final value when +// `tool-input-available` lands. Acceptable for a viewer; can be added +// later by vendoring `fixJson` if the UX warrants it. +// +// `orchestration` carries per-message active-part trackers that mirror +// canonical's `state.activeTextParts` / `state.activeReasoningParts`. They +// let `text-delta` find the right text part by id and let `finish-step` +// clear them so a new step can re-use the same id without colliding. +// +// Returns the same object reference when nothing changes so the caller can +// skip unnecessary state flushes + React re-renders. + +type AnyPart = { [key: string]: unknown; type: string }; + +function applyOutputChunk( + msg: UIMessage, + chunk: OutputChunk, + orchestration: MessageOrchestrationState +): UIMessage { + const type = chunk.type; + + // Text parts --------------------------------------------------------------- + // + // Track each streaming text part by its index in `msg.parts`. Part ids + // are only unique *within a step* — the SDK happily reuses `text-start + // id="0"` after a `finish-step` boundary — so a delta arriving for a + // reused id needs to land on the *current* part, not every prior part + // that ever shared that id. The index map gives us O(1) "which slot is + // currently streaming this id" without any id-based search. + if (type === "text-start") { + const id = chunk.id as string; + const newIndex = (msg.parts ?? []).length; // index AFTER push + orchestration.activeTextPartIndexes.set(id, newIndex); + return withNewPart(msg, { + type: "text", + id, + text: "", + state: "streaming", + }); + } + if (type === "text-delta") { + const id = chunk.id as string; + const index = orchestration.activeTextPartIndexes.get(id); + if (index === undefined) return msg; // delta with no start — drop. + return updatePartAt(msg, index, (p) => ({ + ...p, + text: ((p as { text?: string }).text ?? "") + String(chunk.delta ?? ""), + })); + } + if (type === "text-end") { + const id = chunk.id as string; + const index = orchestration.activeTextPartIndexes.get(id); + if (index === undefined) return msg; + orchestration.activeTextPartIndexes.delete(id); + return updatePartAt(msg, index, (p) => ({ ...p, state: "done" })); + } + + // Reasoning parts ---------------------------------------------------------- + if (type === "reasoning-start") { + const id = chunk.id as string; + const newIndex = (msg.parts ?? []).length; + orchestration.activeReasoningPartIndexes.set(id, newIndex); + return withNewPart(msg, { + type: "reasoning", + id, + text: "", + state: "streaming", + }); + } + if (type === "reasoning-delta") { + const id = chunk.id as string; + const index = orchestration.activeReasoningPartIndexes.get(id); + if (index === undefined) return msg; + return updatePartAt(msg, index, (p) => ({ + ...p, + text: ((p as { text?: string }).text ?? "") + String(chunk.delta ?? ""), + })); + } + if (type === "reasoning-end") { + const id = chunk.id as string; + const index = orchestration.activeReasoningPartIndexes.get(id); + if (index === undefined) return msg; + orchestration.activeReasoningPartIndexes.delete(id); + return updatePartAt(msg, index, (p) => ({ ...p, state: "done" })); + } + + // Tool call parts ---------------------------------------------------------- + if (type === "tool-input-start") { + const toolName = String(chunk.toolName ?? ""); + return withNewPart(msg, { + type: `tool-${toolName}`, + toolCallId: chunk.toolCallId, + toolName, + state: "input-streaming", + input: undefined, + }); + } + if (type === "tool-input-delta") { + // We don't parse partial JSON, so streaming tool input deltas are a + // no-op. The full input snaps in when `tool-input-available` arrives. + return msg; + } + if (type === "tool-input-available") { + const toolName = String(chunk.toolName ?? ""); + const existingIdx = indexOfPart( + msg, + (p) => (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ); + if (existingIdx >= 0) { + return updatePartAt(msg, existingIdx, (p) => ({ + ...p, + state: "input-available", + input: chunk.input, + })); + } + // Tool input arrived without a preceding tool-input-start (some + // providers do this for fast tools) — synthesize a new part. + return withNewPart(msg, { + type: `tool-${toolName}`, + toolCallId: chunk.toolCallId, + toolName, + state: "input-available", + input: chunk.input, + }); + } + if (type === "tool-output-available") { + return updatePart(msg, (p) => + (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ? { + ...p, + state: "output-available", + output: chunk.output, + ...(chunk.preliminary === true ? { preliminary: true } : {}), + } + : null + ); + } + if (type === "tool-output-error") { + return updatePart(msg, (p) => + (p as { toolCallId?: string }).toolCallId === chunk.toolCallId + ? { ...p, state: "output-error", errorText: chunk.errorText } + : null + ); + } + + // Source / file / step / data parts — pass through as a whole ------------- + if (type === "source-url" || type === "source-document" || type === "file") { + return withNewPart(msg, chunk as unknown as AnyPart); + } + if (type === "start-step") { + return withNewPart(msg, { type: "step-start" }); + } + if (type === "finish-step") { + // Step boundary — canonical clears the active part trackers so a new + // step can re-use the same text/reasoning part IDs cleanly. The + // message itself doesn't structurally change; the previous step's + // parts stay frozen at their indexes in `msg.parts`. + orchestration.activeTextPartIndexes.clear(); + orchestration.activeReasoningPartIndexes.clear(); + return msg; + } + if (type.startsWith("data-")) { + return withNewPart(msg, chunk as unknown as AnyPart); + } + + // Metadata / lifecycle ----------------------------------------------------- + if (type === "finish" || type === "message-metadata") { + if (chunk.messageMetadata == null) return msg; + return { + ...msg, + metadata: { + ...((msg as { metadata?: Record }).metadata ?? {}), + ...(chunk.messageMetadata as Record), + }, + } as UIMessage; + } + + // Abort / error / unknown — no structural change. (`start` is handled at + // the orchestration level in the output reader, not here.) + return msg; +} + +// --- Small immutable helpers for UIMessage.parts mutation ------------------- + +function withNewPart(msg: UIMessage, part: AnyPart): UIMessage { + return { + ...msg, + parts: [...((msg.parts ?? []) as AnyPart[]), part], + } as UIMessage; +} + +function updatePart( + msg: UIMessage, + updater: (part: AnyPart) => AnyPart | null +): UIMessage { + const parts = (msg.parts ?? []) as AnyPart[]; + let changed = false; + const next = parts.map((p) => { + const updated = updater(p); + if (updated === null) return p; + changed = true; + return updated; + }); + return changed ? ({ ...msg, parts: next } as UIMessage) : msg; +} + +function indexOfPart(msg: UIMessage, predicate: (part: AnyPart) => boolean): number { + const parts = (msg.parts ?? []) as AnyPart[]; + for (let i = 0; i < parts.length; i++) { + if (predicate(parts[i]!)) return i; + } + return -1; +} + +function updatePartAt( + msg: UIMessage, + index: number, + updater: (part: AnyPart) => AnyPart +): UIMessage { + const parts = (msg.parts ?? []) as AnyPart[]; + if (index < 0 || index >= parts.length) return msg; + const next = parts.slice(); + next[index] = updater(parts[index]!); + return { ...msg, parts: next } as UIMessage; +} diff --git a/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx b/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx index 297234b8d05..3cfbf7521eb 100644 --- a/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx +++ b/apps/webapp/app/components/runs/v3/ai/AIChatMessages.tsx @@ -5,24 +5,14 @@ import { ClipboardDocumentIcon, CodeBracketSquareIcon, } from "@heroicons/react/20/solid"; -import { lazy, Suspense, useState } from "react"; +import { Suspense, useEffect, useState } from "react"; import { CodeBlock } from "~/components/code/CodeBlock"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { Button, LinkButton } from "~/components/primitives/Buttons"; import { Header3 } from "~/components/primitives/Headers"; import tablerSpritePath from "~/components/primitives/tabler-sprite.svg"; import type { DisplayItem, ToolUse } from "./types"; -// Lazy load streamdown to avoid SSR issues -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children }: { children: string }) => ( - - {children} - - ), - })) -); - export type PromptLink = { slug: string; version?: string; @@ -257,30 +247,59 @@ function ToolUseSection({ tools }: { tools: ToolUse[] }) { ); } -type ToolTab = "input" | "output" | "details"; +type ToolTab = "input" | "output" | "details" | "agent"; -function ToolUseRow({ tool }: { tool: ToolUse }) { +export function ToolUseRow({ tool }: { tool: ToolUse }) { const hasInput = tool.inputJson !== "{}"; const hasResult = !!tool.resultOutput; const hasDetails = !!tool.description || !!tool.parametersJson; + const hasSubAgent = !!tool.subAgent; const availableTabs: ToolTab[] = [ + ...(hasSubAgent ? (["agent"] as const) : []), ...(hasInput ? (["input"] as const) : []), ...(hasResult ? (["output"] as const) : []), ...(hasDetails ? (["details"] as const) : []), ]; - const defaultTab: ToolTab | null = hasInput ? "input" : null; - const [activeTab, setActiveTab] = useState(defaultTab); + const [activeTab, setActiveTab] = useState( + hasSubAgent ? "agent" : hasInput ? "input" : null + ); + + // Auto-select input tab when input arrives after initial render (e.g. streaming tool calls) + useEffect(() => { + if (!hasSubAgent && hasInput && activeTab === null) { + setActiveTab("input"); + } + }, [hasInput, hasSubAgent]); function handleTabClick(tab: ToolTab) { setActiveTab(activeTab === tab ? null : tab); } return ( -
+
- {tool.toolName} + {hasSubAgent && ( + + + + )} + + {tool.toolName} + + {hasSubAgent && tool.subAgent?.isStreaming && ( + + + streaming + + )} {tool.resultSummary && ( {tool.resultSummary} )} @@ -288,7 +307,11 @@ function ToolUseRow({ tool }: { tool: ToolUse }) { {availableTabs.length > 0 && ( <> -
+
{availableTabs.map((tab) => (
); } + +function SubAgentContent({ parts }: { parts: any[] }) { + // Extract sub-agent run ID from injected metadata part + const runPart = parts.find( + (p: any) => p.type === "data-subagent-run" && p.data?.runId + ); + const subAgentRunId = runPart?.data?.runId as string | undefined; + + return ( +
+ {subAgentRunId && ( +
+ + View sub-agent run + +
+ )} + {parts.map((part: any, j: number) => { + const partType = part.type as string; + + // Skip the injected metadata part — already rendered above + if (partType === "data-subagent-run") return null; + + if (partType === "text" && part.text) { + return ; + } + + if (partType === "step-start") { + return ( +
+
+ step +
+
+ ); + } + + if (partType.startsWith("tool-")) { + const subToolName = partType.slice(5); + return ( + + ); + } + + if (partType === "reasoning" && part.text) { + return ( +
+
+ {part.text} +
+
+ ); + } + + return null; + })} +
+ ); +} diff --git a/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx b/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx index 5e8bb65688f..c243a1e4d9b 100644 --- a/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx +++ b/apps/webapp/app/components/runs/v3/ai/AISpanDetails.tsx @@ -1,6 +1,7 @@ import { CheckIcon, ClipboardDocumentIcon } from "@heroicons/react/20/solid"; -import { lazy, Suspense, useState } from "react"; +import { Suspense, useState } from "react"; import { Button } from "~/components/primitives/Buttons"; +import { StreamdownRenderer } from "~/components/code/StreamdownRenderer"; import { Header3 } from "~/components/primitives/Headers"; import { Paragraph } from "~/components/primitives/Paragraph"; import { TabButton, TabContainer } from "~/components/primitives/Tabs"; @@ -20,16 +21,6 @@ import type { AISpanData, DisplayItem } from "./types"; import type { PromptSpanData } from "~/presenters/v3/SpanPresenter.server"; import { SpanHorizontalTimeline } from "~/components/runs/v3/SpanHorizontalTimeline"; -const StreamdownRenderer = lazy(() => - import("streamdown").then((mod) => ({ - default: ({ children }: { children: string }) => ( - - {children} - - ), - })) -); - type AITab = "overview" | "messages" | "tools" | "prompt"; export function AISpanDetails({ diff --git a/apps/webapp/app/components/runs/v3/ai/types.ts b/apps/webapp/app/components/runs/v3/ai/types.ts index bb0fd7e74b1..c59c87865d2 100644 --- a/apps/webapp/app/components/runs/v3/ai/types.ts +++ b/apps/webapp/app/components/runs/v3/ai/types.ts @@ -22,6 +22,11 @@ export type ToolUse = { resultSummary?: string; /** Full formatted result for display in a code block */ resultOutput?: string; + /** Sub-agent output — when the tool result is a UIMessage with parts */ + subAgent?: { + parts: any[]; + isStreaming: boolean; + }; }; // --------------------------------------------------------------------------- diff --git a/apps/webapp/app/hooks/useAutoScrollToBottom.ts b/apps/webapp/app/hooks/useAutoScrollToBottom.ts new file mode 100644 index 00000000000..b8e59687ed6 --- /dev/null +++ b/apps/webapp/app/hooks/useAutoScrollToBottom.ts @@ -0,0 +1,104 @@ +import { useEffect, useLayoutEffect, useRef } from "react"; + +const AT_BOTTOM_TOLERANCE_PX = 16; + +/** + * Chat-style sticky-bottom auto-scroll behavior. + * + * Behavior: + * - On mount, finds the closest scrollable ancestor of the returned ref + * (the inspector content panel, the playground messages panel, etc.). + * - Tracks whether the user is currently "at the bottom" of that scroll + * container via a passive scroll listener. Default is `true` so the very + * first render of an existing conversation lands at the bottom, and the + * "content fits without scrolling" case stays in auto-scroll mode. + * - Whenever the dependency array changes (typically the messages array), + * if the user was at the bottom, programmatically scrolls to the new + * bottom. Uses `useLayoutEffect` so the scroll happens before paint and + * there's no one-frame flicker showing new content above the viewport. + * - Scrolling away from the bottom flips the ref to `false` → auto-scroll + * pauses. Scrolling back into the bottom band (within + * `AT_BOTTOM_TOLERANCE_PX`) flips it back to `true` → auto-scroll + * resumes. + * + * The programmatic scroll fires its own scroll event, which immediately + * re-runs the stickiness check and confirms we're still at the bottom + * (distance ≈ 0 ≤ tolerance), so the ref stays `true`. No special + * "ignore programmatic scroll" flag needed. + * + * @param deps Pass the rendered list (or any dependency that should + * trigger a re-scroll). Typically `[messages]`. + * @returns A ref to attach to the component's root element. The hook + * walks up from this element's parent to locate the scroll + * container, so the root must be mounted *inside* the + * scrollable region. + * + * @example + * ```tsx + * function ChatPanel({ messages }) { + * const rootRef = useAutoScrollToBottom([messages]); + * return ( + *
+ *
+ * {messages.map((m) => )} + *
+ *
+ * ); + * } + * ``` + */ +export function useAutoScrollToBottom(deps: ReadonlyArray) { + const rootRef = useRef(null); + const containerRef = useRef(null); + // Default true so initial mount + replay land at the bottom, and the + // no-overflow case stays sticky once content starts to grow. + const stickToBottomRef = useRef(true); + + // Locate the scroll container on mount and attach a passive scroll + // listener that updates `stickToBottomRef`. + useEffect(() => { + const findScrollContainer = (start: HTMLElement | null): HTMLElement | null => { + let current: HTMLElement | null = start; + while (current) { + const style = getComputedStyle(current); + const overflowY = style.overflowY; + if (overflowY === "auto" || overflowY === "scroll") return current; + current = current.parentElement; + } + return null; + }; + + const container = findScrollContainer(rootRef.current?.parentElement ?? null); + if (!container) return; + containerRef.current = container; + + const updateStickiness = () => { + const distanceFromBottom = + container.scrollHeight - container.scrollTop - container.clientHeight; + stickToBottomRef.current = distanceFromBottom <= AT_BOTTOM_TOLERANCE_PX; + }; + + // Seed from current position so the first messages-effect uses an + // accurate value rather than the default `true` if the user happened + // to mount the view already scrolled. + updateStickiness(); + + container.addEventListener("scroll", updateStickiness, { passive: true }); + return () => { + container.removeEventListener("scroll", updateStickiness); + containerRef.current = null; + }; + }, []); + + // After each commit that changes the deps (typically the messages + // array), if we were at the bottom, scroll to the new bottom. + useLayoutEffect(() => { + if (!stickToBottomRef.current) return; + const container = containerRef.current; + if (!container) return; + container.scrollTop = container.scrollHeight; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, deps); + + return rootRef; +} diff --git a/apps/webapp/app/presenters/RunFilters.server.ts b/apps/webapp/app/presenters/RunFilters.server.ts index ff9f53429eb..44bb4c01f50 100644 --- a/apps/webapp/app/presenters/RunFilters.server.ts +++ b/apps/webapp/app/presenters/RunFilters.server.ts @@ -36,6 +36,7 @@ export async function getRunFiltersFromRequest(request: Request): Promise), + conversationSparklines: Promise.resolve({} as Record), + costSparklines: Promise.resolve({} as Record), + tokenSparklines: Promise.resolve({} as Record), + }; + } + + const agents = await this._replica.backgroundWorkerTask.findMany({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + }, + select: { + id: true, + slug: true, + filePath: true, + triggerSource: true, + config: true, + createdAt: true, + }, + orderBy: { + slug: "asc", + }, + }); + + const slugs = agents.map((a) => a.slug); + + if (slugs.length === 0) { + return { + agents, + activeStates: Promise.resolve({} as Record), + conversationSparklines: Promise.resolve({} as Record), + costSparklines: Promise.resolve({} as Record), + tokenSparklines: Promise.resolve({} as Record), + }; + } + + // All queries are deferred for streaming + const activeStates = this.#getActiveStates(environmentId, slugs); + const conversationSparklines = this.#getConversationSparklines(environmentId, slugs); + const costSparklines = this.#getCostSparklines(environmentId, slugs); + const tokenSparklines = this.#getTokenSparklines(environmentId, slugs); + + return { agents, activeStates, conversationSparklines, costSparklines, tokenSparklines }; + } + + /** Count runs currently executing vs suspended per agent */ + async #getActiveStates( + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = this.clickhouse.reader.query({ + name: "agentActiveStates", + query: `SELECT + task_identifier, + countIf(status = 'EXECUTING') AS running, + countIf(status IN ('WAITING_TO_RESUME', 'QUEUED_EXECUTING')) AS suspended + FROM trigger_dev.task_runs_v2 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND task_kind = 'AGENT' + AND status IN ('EXECUTING', 'WAITING_TO_RESUME', 'QUEUED_EXECUTING') + GROUP BY task_identifier`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + running: z.coerce.number(), + suspended: z.coerce.number(), + }), + }); + + const [error, rows] = await queryFn({ environmentId, slugs }); + if (error) { + console.error("Agent active states query failed:", error); + return {}; + } + + const result: Record = {}; + for (const row of rows) { + result[row.task_identifier] = { running: row.running, suspended: row.suspended }; + } + return result; + } + + /** 24h hourly sparkline of conversation (run) count per agent */ + async #getConversationSparklines( + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = this.clickhouse.reader.query({ + name: "agentConversationSparklines", + query: `SELECT + task_identifier, + toStartOfHour(created_at) AS bucket, + count() AS val + FROM trigger_dev.task_runs_v2 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND task_kind = 'AGENT' + AND created_at >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** 24h hourly sparkline of LLM cost per agent */ + async #getCostSparklines( + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = this.clickhouse.reader.query({ + name: "agentCostSparklines", + query: `SELECT + task_identifier, + toStartOfHour(start_time) AS bucket, + sum(total_cost) AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND start_time >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** 24h hourly sparkline of total tokens per agent */ + async #getTokenSparklines( + environmentId: string, + slugs: string[] + ): Promise> { + const queryFn = this.clickhouse.reader.query({ + name: "agentTokenSparklines", + query: `SELECT + task_identifier, + toStartOfHour(start_time) AS bucket, + sum(total_tokens) AS val + FROM trigger_dev.llm_metrics_v1 + WHERE environment_id = {environmentId: String} + AND task_identifier IN {slugs: Array(String)} + AND start_time >= now() - INTERVAL 24 HOUR + GROUP BY task_identifier, bucket + ORDER BY task_identifier, bucket`, + params: z.object({ + environmentId: z.string(), + slugs: z.array(z.string()), + }), + schema: z.object({ + task_identifier: z.string(), + bucket: z.string(), + val: z.coerce.number(), + }), + }); + + return this.#buildSparklineMap(await queryFn({ environmentId, slugs }), slugs); + } + + /** Convert ClickHouse query result to sparkline map with zero-filled 24 hourly buckets */ + #buildSparklineMap( + queryResult: [Error, null] | [null, { task_identifier: string; bucket: string; val: number }[]], + slugs: string[] + ): Record { + const [error, rows] = queryResult; + if (error) { + console.error("Agent sparkline query failed:", error); + return {}; + } + return this.#buildSparklineFromRows(rows, slugs); + } + + #buildSparklineFromRows( + rows: { task_identifier: string; bucket: string; val: number }[], + slugs: string[] + ): Record { + const now = new Date(); + const startHour = new Date( + Date.UTC( + now.getUTCFullYear(), + now.getUTCMonth(), + now.getUTCDate(), + now.getUTCHours() - 23, + 0, + 0, + 0 + ) + ); + + const bucketKeys: string[] = []; + for (let i = 0; i < 24; i++) { + const h = new Date(startHour.getTime() + i * 3600_000); + bucketKeys.push(h.toISOString().slice(0, 13).replace("T", " ") + ":00:00"); + } + + const rowMap = new Map(); + for (const row of rows) { + rowMap.set(`${row.task_identifier}|${row.bucket}`, row.val); + } + + const result: Record = {}; + for (const slug of slugs) { + result[slug] = bucketKeys.map((key) => rowMap.get(`${slug}|${key}`) ?? 0); + } + return result; + } +} + +export const agentListPresenter = singleton("agentListPresenter", setupAgentListPresenter); + +function setupAgentListPresenter() { + return new AgentListPresenter(clickhouseClient, $replica); +} diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index 254ec18d1c0..2002f5425ca 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -304,6 +304,7 @@ export class ApiRunListPresenter extends BasePresenter { durationMs: run.usageDurationMs, depth: run.depth, metadata, + taskKind: run.taskKind, ...ApiRetrieveRunPresenter.apiBooleanHelpersFromRunStatus( ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status, apiVersion) ), diff --git a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts index de111abd279..f0a2d363d61 100644 --- a/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/NextRunListPresenter.server.ts @@ -1,5 +1,6 @@ import { type ClickHouse } from "@internal/clickhouse"; import { MachinePresetName } from "@trigger.dev/core/v3"; +import { RunAnnotations } from "@trigger.dev/core/v3/schemas"; import { type PrismaClient, type PrismaClientOrTransaction, @@ -34,6 +35,7 @@ export type RunListOptions = { queues?: string[]; machines?: MachinePresetName[]; errorId?: string; + sources?: string[]; //pagination direction?: Direction; cursor?: string; @@ -72,6 +74,7 @@ export class NextRunListPresenter { queues, machines, errorId, + sources, from, to, direction = "forward", @@ -89,6 +92,7 @@ export class NextRunListPresenter { const hasStatusFilters = statuses && statuses.length > 0; const hasFilters = + (sources !== undefined && sources.length > 0) || (tasks !== undefined && tasks.length > 0) || (versions !== undefined && versions.length > 0) || hasStatusFilters || @@ -186,6 +190,7 @@ export class NextRunListPresenter { queues, machines, errorId, + taskKinds: sources, page: { size: pageSize, cursor, @@ -250,6 +255,7 @@ export class NextRunListPresenter { name: run.queue.replace("task/", ""), type: run.queue.startsWith("task/") ? "task" : "custom", }, + taskKind: RunAnnotations.safeParse(run.annotations).data?.taskKind ?? "STANDARD", }; }), pagination: { diff --git a/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts b/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts new file mode 100644 index 00000000000..656bc425cdf --- /dev/null +++ b/apps/webapp/app/presenters/v3/PlaygroundPresenter.server.ts @@ -0,0 +1,147 @@ +import type { RuntimeEnvironmentType, TaskRunStatus, TaskTriggerSource } from "@trigger.dev/database"; +import { $replica } from "~/db.server"; +import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; +import { isFinalRunStatus } from "~/v3/taskStatus"; + +export type PlaygroundAgent = { + slug: string; + filePath: string; + triggerSource: TaskTriggerSource; + config: unknown; + payloadSchema: unknown; +}; + +export type PlaygroundConversation = { + id: string; + chatId: string; + title: string; + agentSlug: string; + runFriendlyId: string | null; + runStatus: TaskRunStatus | null; + clientData: unknown; + messages: unknown; + lastEventId: string | null; + isActive: boolean; + createdAt: Date; + updatedAt: Date; +}; + +export class PlaygroundPresenter { + async listAgents({ + environmentId, + environmentType, + }: { + environmentId: string; + environmentType: RuntimeEnvironmentType; + }): Promise { + const currentWorker = await findCurrentWorkerFromEnvironment( + { id: environmentId, type: environmentType }, + $replica + ); + + if (!currentWorker) return []; + + return $replica.backgroundWorkerTask.findMany({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + }, + select: { + slug: true, + filePath: true, + triggerSource: true, + config: true, + payloadSchema: true, + }, + orderBy: { slug: "asc" }, + }); + } + + async getAgent({ + environmentId, + environmentType, + agentSlug, + }: { + environmentId: string; + environmentType: RuntimeEnvironmentType; + agentSlug: string; + }): Promise { + const currentWorker = await findCurrentWorkerFromEnvironment( + { id: environmentId, type: environmentType }, + $replica + ); + + if (!currentWorker) return null; + + return $replica.backgroundWorkerTask.findFirst({ + where: { + workerId: currentWorker.id, + triggerSource: "AGENT", + slug: agentSlug, + }, + select: { + slug: true, + filePath: true, + triggerSource: true, + config: true, + payloadSchema: true, + }, + }); + } + + async getRecentConversations({ + environmentId, + agentSlug, + userId, + limit = 10, + }: { + environmentId: string; + agentSlug: string; + userId: string; + limit?: number; + }): Promise { + const conversations = await $replica.playgroundConversation.findMany({ + where: { + runtimeEnvironmentId: environmentId, + agentSlug, + userId, + }, + select: { + id: true, + chatId: true, + title: true, + agentSlug: true, + clientData: true, + messages: true, + lastEventId: true, + createdAt: true, + updatedAt: true, + run: { + select: { + friendlyId: true, + status: true, + }, + }, + }, + orderBy: { updatedAt: "desc" }, + take: limit, + }); + + return conversations.map((c) => ({ + id: c.id, + chatId: c.chatId, + title: c.title, + agentSlug: c.agentSlug, + runFriendlyId: c.run?.friendlyId ?? null, + runStatus: c.run?.status ?? null, + clientData: c.clientData, + messages: c.messages, + lastEventId: c.lastEventId, + isActive: c.run?.status ? !isFinalRunStatus(c.run.status) : false, + createdAt: c.createdAt, + updatedAt: c.updatedAt, + })); + } +} + +export const playgroundPresenter = new PlaygroundPresenter(); diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index de41aee4411..b186848f296 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -1,12 +1,29 @@ import { type MachinePreset, + parsePacket, prettyPrintPacket, + RunAnnotations, SemanticInternalAttributes, type TaskRunContext, TaskRunError, TriggerTraceContext, type V3TaskRunContext, } from "@trigger.dev/core/v3"; + +/** + * Minimal structural type for the user messages we extract from an agent + * run's task payload. We deliberately avoid importing AI SDK's `UIMessage` + * here because the webapp's pinned `ai@4` declares a wider role union + * (`'data' | ...`) than `@ai-sdk/react@3`'s `UIMessage` accepts. The data + * crosses a JSON boundary anyway (typedjson) — keeping this loose lets the + * client-side type be the source of truth. + */ +type AgentInitialMessage = { + id: string; + role: "user" | "assistant" | "system"; + parts?: unknown[]; + [key: string]: unknown; +}; import { AttemptId, getMaxDuration, parseTraceparent } from "@trigger.dev/core/v3/isomorphic"; import { extractIdempotencyKeyScope, @@ -240,6 +257,49 @@ export class SpanPresenter extends BasePresenter { const externalTraceId = this.#getExternalTraceId(run.traceContext); + const taskKind = RunAnnotations.safeParse(run.annotations).data?.taskKind; + const isAgentRun = taskKind === "AGENT"; + + // For agent runs, extract the initial user messages + the backing + // Session handle from the task payload (from the original + // `triggerTask({ payload: { messages, sessionId, chatId, ... } })` + // call). When the run was started with `trigger: "preload"`, + // `messages` is empty — the first user message arrives later over + // the session `.in` channel and is merged in by the AgentView. + // + // `agentSession` is the identifier the dashboard uses to address the + // backing Session when subscribing to `.out` / `.in`. Prefer the + // explicit `sessionId` threaded by `TriggerChatTransport` / + // `chat.createTriggerAction`; fall back to `chatId` for pre-migration + // agent runs (the session resource route accepts either, matching + // `resolveSessionByIdOrExternalId`). + let agentInitialMessages: AgentInitialMessage[] = []; + let agentSession: string | null = null; + if (isAgentRun && run.payload && run.payloadType !== "application/store") { + try { + const parsed = await parsePacket({ + data: typeof run.payload === "string" ? run.payload : JSON.stringify(run.payload), + dataType: run.payloadType ?? "application/json", + }); + if (parsed && typeof parsed === "object") { + if (Array.isArray((parsed as any).messages)) { + agentInitialMessages = (parsed as any).messages as AgentInitialMessage[]; + } + const sessionId = (parsed as any).sessionId; + const chatId = (parsed as any).chatId; + if (typeof sessionId === "string" && sessionId.length > 0) { + agentSession = sessionId; + } else if (typeof chatId === "string" && chatId.length > 0) { + agentSession = chatId; + } + } + } catch { + // Fall back to empty initial messages + null session — the + // AgentView will show a loading spinner and surface any stream + // subscription errors to the console. + } + } + let region: { name: string; location: string | null } | null = null; if (run.runtimeEnvironment.type !== "DEVELOPMENT" && run.engine !== "V1") { @@ -297,6 +357,9 @@ export class SpanPresenter extends BasePresenter { isFinished, isRunning: RUNNING_STATUSES.includes(run.status), isError: isFailedRunStatus(run.status), + isAgentRun, + agentInitialMessages, + agentSession, payload, payloadType: run.payloadType, output, @@ -455,6 +518,7 @@ export class SpanPresenter extends BasePresenter { payloadType: true, metadata: true, metadataType: true, + annotations: true, maxAttempts: true, project: { include: { diff --git a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts index f1635f23375..fc29f5510e8 100644 --- a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts @@ -61,6 +61,7 @@ export class TaskListPresenter { const tasks = await this._replica.backgroundWorkerTask.findMany({ where: { workerId: currentWorker.id, + triggerSource: { not: "AGENT" }, }, select: { id: true, diff --git a/apps/webapp/app/presenters/v3/TestPresenter.server.ts b/apps/webapp/app/presenters/v3/TestPresenter.server.ts index af5bb93a7e7..b817bbf155e 100644 --- a/apps/webapp/app/presenters/v3/TestPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestPresenter.server.ts @@ -19,15 +19,13 @@ export class TestPresenter extends BasePresenter { const tasks = await this.#getTasks(environmentId, isDev); return { - tasks: tasks.map((task) => { - return { - id: task.id, - taskIdentifier: task.slug, - filePath: task.filePath, - friendlyId: task.friendlyId, - triggerSource: task.triggerSource, - }; - }), + tasks: tasks.map((task) => ({ + id: task.id, + taskIdentifier: task.slug, + filePath: task.filePath, + friendlyId: task.friendlyId, + triggerSource: task.triggerSource, + })), }; } @@ -54,10 +52,13 @@ export class TestPresenter extends BasePresenter { SELECT bwt.id, version, slug, "filePath", bwt."friendlyId", bwt."triggerSource" FROM latest_workers JOIN ${sqlDatabaseSchema}."BackgroundWorkerTask" bwt ON bwt."workerId" = latest_workers.id + WHERE bwt."triggerSource" != 'AGENT' ORDER BY slug ASC;`; } else { const currentDeployment = await findCurrentWorkerDeployment({ environmentId: envId }); - return currentDeployment?.worker?.tasks ?? []; + return (currentDeployment?.worker?.tasks ?? []).filter( + (t) => t.triggerSource !== "AGENT" + ); } } } diff --git a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts index 09abb22639e..d5360cd004a 100644 --- a/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TestTaskPresenter.server.ts @@ -373,6 +373,10 @@ export class TestTaskPresenter { ), }; } + case "AGENT": { + // AGENT tasks are filtered out by TestPresenter and shouldn't reach here + return { foundTask: false }; + } default: { return task.triggerSource satisfies never; } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx new file mode 100644 index 00000000000..deedafd9879 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.agents/route.tsx @@ -0,0 +1,360 @@ +import { BeakerIcon, CpuChipIcon, MagnifyingGlassIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/node"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { Suspense } from "react"; +import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; +import { RunsIcon } from "~/assets/icons/RunsIcon"; +import { MainCenteredContainer, PageBody, PageContainer } from "~/components/layout/AppLayout"; +import { Badge } from "~/components/primitives/Badge"; +import { Header2 } from "~/components/primitives/Headers"; +import { Input } from "~/components/primitives/Input"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { + Table, + TableBlankRow, + TableBody, + TableCell, + TableCellMenu, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { SimpleTooltip } from "~/components/primitives/Tooltip"; +import { PopoverMenuItem } from "~/components/primitives/Popover"; +import { TaskFileName } from "~/components/runs/v3/TaskPath"; +import { useFuzzyFilter } from "~/hooks/useFuzzyFilter"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type AgentListItem, + type AgentActiveState, + agentListPresenter, +} from "~/presenters/v3/AgentListPresenter.server"; +import { requireUserId } from "~/services/session.server"; +import { EnvironmentParamSchema, v3RunsPath, v3PlaygroundAgentPath } from "~/utils/pathBuilder"; +import { cn } from "~/utils/cn"; + +export const meta: MetaFunction = () => { + return [{ title: "Agents | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response(undefined, { status: 404, statusText: "Project not found" }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response(undefined, { status: 404, statusText: "Environment not found" }); + } + + const result = await agentListPresenter.call({ + organizationId: project.organizationId, + projectId: project.id, + environmentId: environment.id, + environmentType: environment.type, + }); + + return typeddefer(result); +}; + +export default function AgentsPage() { + const { agents, activeStates, conversationSparklines, costSparklines, tokenSparklines } = + useTypedLoaderData(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const { filterText, setFilterText, filteredItems } = useFuzzyFilter({ + items: agents, + keys: ["slug", "filePath"], + }); + + if (agents.length === 0) { + return ( + + + + + + +
+ + No agents deployed + + Create a chat agent using chat.agent() from{" "} + @trigger.dev/sdk/ai and deploy it to see it here. + +
+
+
+
+ ); + } + + return ( + + + + + +
+
+
+
+ setFilterText(e.target.value)} + autoFocus + /> +
+ + + + ID + Type + File + Active + Conversations (24h) + Cost (24h) + Tokens (24h) + Go to page + + + + {filteredItems.length > 0 ? ( + filteredItems.map((agent) => { + const path = v3RunsPath(organization, project, environment, { + tasks: [agent.slug], + }); + const agentType = + (agent.config as { type?: string } | null)?.type ?? "unknown"; + + return ( + + +
+ + } + content="Agent" + /> + {agent.slug} +
+
+ + {formatAgentType(agentType)} + + + + + + }> + –}> + {(data) => { + const state = data[agent.slug]; + if (!state || (state.running === 0 && state.suspended === 0)) { + return ( + + ); + } + return ( + + {state.running > 0 && ( + + + {state.running} + + )} + {state.running > 0 && state.suspended > 0 && ( + · + )} + {state.suspended > 0 && ( + + + {state.suspended} + + )} + + ); + }} + + + + + }> + –}> + {(data) => ( + + )} + + + + + }> + –}> + {(data) => ( + + )} + + + + + }> + –}> + {(data) => ( + + )} + + + + + + + + } + hiddenButtons={ + + Playground + + } + /> +
+ ); + }) + ) : ( + + + No agents match your filters + + + )} +
+
+
+
+
+
+
+ ); +} + +function formatAgentType(type: string): string { + switch (type) { + case "ai-sdk-chat": + return "AI SDK Chat"; + default: + return type; + } +} + +function formatCount(total: number): string { + if (total === 0) return "0"; + if (total >= 1000) return `${(total / 1000).toFixed(1)}k`; + return total.toString(); +} + +function formatCost(total: number): string { + if (total === 0) return "$0"; + if (total < 0.01) return `$${total.toFixed(4)}`; + if (total < 1) return `$${total.toFixed(2)}`; + return `$${total.toFixed(2)}`; +} + +function formatTokens(total: number): string { + if (total === 0) return "0"; + if (total >= 1_000_000) return `${(total / 1_000_000).toFixed(1)}M`; + if (total >= 1000) return `${(total / 1000).toFixed(1)}k`; + return total.toString(); +} + +function SparklinePlaceholder() { + return
; +} + +function SparklineWithTotal({ + data, + formatTotal, + color = "text-text-bright", + barColor = "#3B82F6", +}: { + data?: number[]; + formatTotal: (total: number) => string; + color?: string; + barColor?: string; +}) { + if (!data || data.every((v) => v === 0)) { + return ; + } + + const total = data.reduce((sum, v) => sum + v, 0); + const max = Math.max(...data); + + return ( +
+
+ {data.map((value, i) => { + const height = max > 0 ? Math.max((value / max) * 100, value > 0 ? 8 : 0) : 0; + return ( +
0 ? barColor : "transparent", + opacity: value > 0 ? 0.8 : 0, + }} + /> + ); + })} +
+ {formatTotal(total)} +
+ ); +} diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx new file mode 100644 index 00000000000..6b433ec16a0 --- /dev/null +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground.$agentParam/route.tsx @@ -0,0 +1,1209 @@ +import { + ArrowUpIcon, + BoltIcon, + CpuChipIcon, + StopIcon, + ArrowPathIcon, + TrashIcon, +} from "@heroicons/react/20/solid"; +import { type MetaFunction } from "@remix-run/node"; +import { Link, useFetcher, useNavigate, useRouteLoaderData } from "@remix-run/react"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { useChat } from "@ai-sdk/react"; +import { TriggerChatTransport } from "@trigger.dev/sdk/chat"; +import { MainCenteredContainer } from "~/components/layout/AppLayout"; +import { Badge } from "~/components/primitives/Badge"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; +import { CopyButton } from "~/components/primitives/CopyButton"; +import { DurationPicker } from "~/components/primitives/DurationPicker"; +import { Header3 } from "~/components/primitives/Headers"; +import { Hint } from "~/components/primitives/Hint"; +import { Input } from "~/components/primitives/Input"; +import { InputGroup } from "~/components/primitives/InputGroup"; +import { Label } from "~/components/primitives/Label"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Spinner } from "~/components/primitives/Spinner"; +import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; +import { ClockRotateLeftIcon } from "~/assets/icons/ClockRotateLeftIcon"; +import type { PlaygroundConversation } from "~/presenters/v3/PlaygroundPresenter.server"; +import { DateTime } from "~/components/primitives/DateTime"; +import { cn } from "~/utils/cn"; +import { JSONEditor } from "~/components/code/JSONEditor"; +import { ToolUseRow, AssistantResponse, ChatBubble } from "~/components/runs/v3/ai/AIChatMessages"; +import { MessageBubble } from "~/components/runs/v3/agent/AgentMessageView"; +import { useAutoScrollToBottom } from "~/hooks/useAutoScrollToBottom"; +import { + ResizableHandle, + ResizablePanel, + ResizablePanelGroup, +} from "~/components/primitives/Resizable"; +import { + ClientTabs, + ClientTabsContent, + ClientTabsList, + ClientTabsTrigger, +} from "~/components/primitives/ClientTabs"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { playgroundPresenter } from "~/presenters/v3/PlaygroundPresenter.server"; +import { requireUserId } from "~/services/session.server"; +import { RunTagInput } from "~/components/runs/v3/RunTagInput"; +import { Select, SelectItem } from "~/components/primitives/Select"; +import { EnvironmentParamSchema, v3PlaygroundAgentPath } from "~/utils/pathBuilder"; +import { env as serverEnv } from "~/env.server"; +import { generateJWT as internal_generateJWT, MachinePresetName } from "@trigger.dev/core/v3"; +import { extractJwtSigningSecretKey } from "~/services/realtime/jwtAuth.server"; +import { SchemaTabContent } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/SchemaTabContent"; +import { AIPayloadTabContent } from "~/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/AIPayloadTabContent"; +import type { UIMessage } from "@ai-sdk/react"; + +export const meta: MetaFunction = () => { + return [{ title: "Playground | Trigger.dev" }]; +}; + +export const loader = async ({ request, params }: LoaderFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + const agentSlug = params.agentParam; + + if (!agentSlug) { + throw new Response(undefined, { status: 404, statusText: "Agent not specified" }); + } + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + throw new Response(undefined, { status: 404, statusText: "Project not found" }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + throw new Response(undefined, { status: 404, statusText: "Environment not found" }); + } + + const agent = await playgroundPresenter.getAgent({ + environmentId: environment.id, + environmentType: environment.type, + agentSlug, + }); + + if (!agent) { + throw new Response(undefined, { status: 404, statusText: "Agent not found" }); + } + + const agentConfig = agent.config as { type?: string } | null; + const apiOrigin = serverEnv.API_ORIGIN || serverEnv.LOGIN_ORIGIN || "http://localhost:3030"; + + const recentConversations = await playgroundPresenter.getRecentConversations({ + environmentId: environment.id, + agentSlug, + userId, + }); + + // Check for ?conversation= param to resume an existing conversation + const url = new URL(request.url); + const conversationId = url.searchParams.get("conversation"); + + let activeConversation: { + chatId: string; + runFriendlyId: string | null; + publicAccessToken: string | null; + clientData: unknown; + messages: unknown; + lastEventId: string | null; + } | null = null; + + if (conversationId) { + const conv = recentConversations.find((c) => c.id === conversationId); + if (conv) { + let jwt: string | null = null; + if (conv.isActive && conv.runFriendlyId) { + jwt = await internal_generateJWT({ + secretKey: extractJwtSigningSecretKey(environment), + payload: { + sub: environment.id, + pub: true, + scopes: [`read:runs:${conv.runFriendlyId}`, `write:inputStreams:${conv.runFriendlyId}`], + }, + expirationTime: "1h", + }); + } + + activeConversation = { + chatId: conv.chatId, + runFriendlyId: conv.runFriendlyId, + publicAccessToken: jwt, + clientData: conv.clientData, + messages: conv.messages, + lastEventId: conv.lastEventId, + }; + } + } + + return typedjson({ + agent: { + slug: agent.slug, + filePath: agent.filePath, + type: agentConfig?.type ?? "unknown", + clientDataSchema: agent.payloadSchema ?? null, + }, + apiOrigin, + recentConversations, + activeConversation, + }); +}; + +export default function PlaygroundAgentPage() { + const { agent, activeConversation } = useTypedLoaderData(); + // Key on agent slug + conversation chatId so React remounts all stateful + // children when switching agents or navigating between conversations. + // Without the agent slug, switching agents keeps key="new" and React + // reuses the component — useState initializers don't re-run. + const conversationKey = `${agent.slug}:${activeConversation?.chatId ?? "new"}`; + return ; +} + +const PARENT_ROUTE_ID = + "routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.playground"; + +function PlaygroundChat() { + const { agent, apiOrigin, recentConversations, activeConversation } = + useTypedLoaderData(); + const parentData = useRouteLoaderData(PARENT_ROUTE_ID) as + | { + agents: Array<{ slug: string }>; + versions: string[]; + regions: Array<{ + id: string; + name: string; + description?: string; + isDefault: boolean; + }>; + isDev: boolean; + } + | undefined; + const agents = parentData?.agents ?? []; + const versions = parentData?.versions ?? []; + const regions = parentData?.regions ?? []; + const isDev = parentData?.isDev ?? false; + const defaultRegion = regions.find((r) => r.isDefault); + const navigate = useNavigate(); + const organization = useOrganization(); + const project = useProject(); + const environment = useEnvironment(); + + const [conversationId, setConversationId] = useState(() => + activeConversation + ? recentConversations.find((c) => c.chatId === activeConversation.chatId)?.id ?? null + : null + ); + const [chatId, setChatId] = useState(() => activeConversation?.chatId ?? crypto.randomUUID()); + const [clientDataJson, setClientDataJson] = useState(() => + activeConversation?.clientData ? JSON.stringify(activeConversation.clientData, null, 2) : "{}" + ); + const clientDataJsonRef = useRef(clientDataJson); + clientDataJsonRef.current = clientDataJson; + const [machine, setMachine] = useState(undefined); + const [tags, setTags] = useState([]); + const [maxAttempts, setMaxAttempts] = useState(undefined); + const [maxDuration, setMaxDuration] = useState(undefined); + const [version, setVersion] = useState(undefined); + const [region, setRegion] = useState(() => + isDev ? undefined : defaultRegion?.name + ); + + const actionPath = `/resources/orgs/${organization.slug}/projects/${project.slug}/env/${environment.slug}/playground/action`; + + // Server-side `start` via Remix action — atomically creates the + // backing Session for `chatId` and triggers the first run, returns + // the session-scoped PAT. Idempotent: called on initial use AND on + // 401, so the same code path serves both first-run and PAT renewal. + const startSession = useCallback( + async (): Promise => { + const formData = new FormData(); + formData.set("intent", "start"); + formData.set("agentSlug", agent.slug); + formData.set("chatId", chatId); + formData.set("clientData", clientDataJsonRef.current); + if (tags.length > 0) formData.set("tags", tags.join(",")); + if (machine) formData.set("machine", machine); + if (maxAttempts) formData.set("maxAttempts", String(maxAttempts)); + if (maxDuration) formData.set("maxDuration", String(maxDuration)); + if (version) formData.set("version", version); + if (region) formData.set("region", region); + + const response = await fetch(actionPath, { method: "POST", body: formData }); + const data = (await response.json()) as { + runId?: string; + publicAccessToken?: string; + conversationId?: string; + error?: string; + }; + + if (!response.ok || !data.publicAccessToken) { + throw new Error(data.error ?? "Failed to start chat session"); + } + + if (data.conversationId) { + setConversationId(data.conversationId); + } + + return data.publicAccessToken; + }, + [actionPath, agent.slug, chatId, tags, machine, maxAttempts, maxDuration, version, region] + ); + + // Resource route prefix — all realtime traffic goes through session-authed routes + const playgroundBaseURL = `${apiOrigin}/resources/orgs/${organization.slug}/projects/${project.slug}/env/${environment.slug}/playground`; + + // Create TriggerChatTransport directly (not via useTriggerChatTransport hook + // to avoid React version mismatch between SDK and webapp) + const transportRef = useRef(null); + if (transportRef.current === null) { + transportRef.current = new TriggerChatTransport({ + task: agent.slug, + // The Remix action is idempotent on `(env, externalId)` and + // returns a fresh session PAT every time, so it serves both + // first-run create and PAT renewal. `startSession` runs on + // `transport.preload(chatId)` and lazily on the first + // `sendMessage`; `accessToken` runs on a 401/403 from any + // session-PAT-authed request. Wiring the same call to both + // keeps the Preload button working without a separate refresh + // route. + startSession: async () => ({ publicAccessToken: await startSession() }), + accessToken: () => startSession(), + baseURL: playgroundBaseURL, + clientData: JSON.parse(clientDataJson || "{}") as Record, + ...(activeConversation?.publicAccessToken + ? { + sessions: { + [activeConversation.chatId]: { + publicAccessToken: activeConversation.publicAccessToken, + lastEventId: activeConversation.lastEventId ?? undefined, + }, + }, + } + : {}), + }); + } + const transport = transportRef.current; + + // Initial messages from persisted conversation (for resume) + const initialMessages = activeConversation?.messages + ? (activeConversation.messages as UIMessage[]) + : []; + + // Track the initial message count so we only save after genuinely new turns + // (not during resume replay which re-fires onFinish for replayed turns) + const initialMessageCountRef = useRef(initialMessages?.length ?? 0); + + // Save messages after each turn completes + const saveMessages = useCallback( + (allMessages: UIMessage[]) => { + // Skip saves during resume replay — only save when we have more messages than we started with + if (allMessages.length <= initialMessageCountRef.current) return; + + const currentSession = transport.getSession(chatId); + const lastEventId = currentSession?.lastEventId; + + const formData = new FormData(); + formData.set("intent", "save"); + formData.set("agentSlug", agent.slug); + formData.set("chatId", chatId); + formData.set("messages", JSON.stringify(allMessages)); + if (lastEventId) formData.set("lastEventId", lastEventId); + + // Fire and forget + fetch(actionPath, { method: "POST", body: formData }).catch(() => {}); + + // Update the baseline so subsequent saves work correctly + initialMessageCountRef.current = allMessages.length; + }, + [chatId, agent.slug, actionPath, transport] + ); + + // useChat from AI SDK — handles message accumulation, streaming, stop + const { messages, sendMessage, stop, status, error } = useChat({ + id: chatId, + messages: initialMessages, + transport, + onFinish: ({ messages: allMessages }) => { + saveMessages(allMessages); + }, + }); + + const isStreaming = status === "streaming"; + const isSubmitted = status === "submitted"; + + // Sticky-bottom auto-scroll for the messages list. The hook walks up to + // the surrounding `overflow-y-auto` panel and follows the conversation + // as new chunks stream in — pauses if you scroll up to read history, + // resumes when you scroll back into the bottom band. Same behavior as + // the run-inspector Agent tab. + const messagesRootRef = useAutoScrollToBottom([messages, isSubmitted]); + + // Pending messages — steering during streaming + const pending = usePlaygroundPendingMessages({ + transport, + chatId, + status, + messages, + sendMessage, + metadata: safeParseJson(clientDataJson), + }); + + const [input, setInput] = useState(""); + const [preloading, setPreloading] = useState(false); + const [preloaded, setPreloaded] = useState(false); + const inputRef = useRef(null); + + const session = transport.getSession(chatId); + + const handlePreload = useCallback(async () => { + setPreloading(true); + try { + await transport.preload(chatId); + setPreloaded(true); + inputRef.current?.focus(); + } finally { + setPreloading(false); + } + }, [transport, chatId]); + + const handleNewConversation = useCallback(() => { + // Navigate without ?conversation= so the loader returns activeConversation=null + // and the key changes to "new", causing a full remount with fresh state. + navigate(window.location.pathname); + }, [navigate]); + + const handleDeleteConversation = useCallback(async () => { + if (!conversationId) return; + + const formData = new FormData(); + formData.set("intent", "delete"); + formData.set("agentSlug", agent.slug); + formData.set("deleteConversationId", conversationId); + + await fetch(actionPath, { method: "POST", body: formData }); + handleNewConversation(); + }, [conversationId, agent.slug, actionPath, handleNewConversation]); + + const handleSend = useCallback(() => { + const trimmed = input.trim(); + if (!trimmed) return; + + setInput(""); + // steer() handles both cases: sends via input stream during streaming, + // or sends as a normal message when ready + pending.steer(trimmed); + }, [input, pending]); + + const handleKeyDown = useCallback( + (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + handleSend(); + } + }, + [handleSend] + ); + + return ( + + +
+ {/* Header */} +
+
+ + {formatAgentType(agent.type)} +
+
+ {activeConversation?.runFriendlyId && ( + + View run + + )} + {messages.length > 0 && ( + + Copy raw + + )} + + {conversationId && ( + +
+
+ + {/* Messages */} +
+ {messages.length === 0 ? ( + +
+ {preloaded ? ( + <> + + Preloaded + + Agent is warmed up and waiting. Type a message below to start. + + + ) : ( + <> + + Start a conversation + + Type a message below to start testing{" "} + {agent.slug} + + {!session && ( + + )} + + )} +
+
+ ) : ( +
+ {messages.map((msg) => ( + + ))} + {isSubmitted && ( +
+
+ + Thinking... +
+
+ )} +
+ )} +
+ + {/* Error */} + {error && ( +
+ {error.message} +
+ )} + + {/* Input */} +
+
+