Skip to content

Commit a5a80d6

Browse files
committed
fix(hooks): address review findings on the new hook events
Three issues surfaced by code review: * subagent_stop now fires on the error path of both sub-session helpers. Previously the hook was placed *after* the for-range childEvents loop, which an ErrorEvent skipped via early return \u2014 so handlers configured to observe sub-agent completions silently missed every failed run. Move the dispatch into a defer at the top of each helper so it fires for both success and failure (handlers can detect failure via empty stop_response or by correlating with the parent's error event). * Clarify the EventPermissionRequest doc comment. The old text said the hook "mirrors pre_tool_use" \u2014 misleading, because pre_tool_use treats allow as the implicit default while permission_request treats it as an explicit auto-approve verdict (and that asymmetry is the whole reason Result.PermissionAllowed exists separately from Result.Allowed). New comment spells out the contract. * examples/hooks.yaml now documents that the command-style hooks need jq, with install hints and a note on the graceful-degradation behaviour when jq is missing. Plus a regression test pinning the user_prompt_submit gating contract: fires exactly once on a top-level submission, never on a sub-session (SendUserMessage=false). The test caches a counter across the two cases via a tiny shared scaffold. Assisted-By: docker-agent
1 parent 6e27664 commit a5a80d6

4 files changed

Lines changed: 149 additions & 17 deletions

File tree

‎examples/hooks.yaml‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@
3636
# for parameters. Three are shipped: add_date,
3737
# add_environment_info, add_prompt_files.
3838
#
39+
# Requirements: the command-style hooks below pipe stdin through `jq` for
40+
# convenient JSON access. If you don't have jq installed (`brew install jq`
41+
# on macOS, `apt-get install jq` on Debian/Ubuntu), a hook will exit non-zero
42+
# and the runtime will log a 'Hook execution error' warning but otherwise
43+
# continue — use plain `awk`/`sed`/`grep` or any other parser if you prefer.
44+
#
3945
# Try these prompts:
4046
# "Run: echo hello" → allowed
4147
# "Run: rm -rf /tmp/test" → denied by pre_tool_use

‎pkg/hooks/types.go‎

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,20 @@ const (
2222
// patterns like a tool-call loop detector.
2323
EventPostToolUse EventType = "post_tool_use"
2424
// EventPermissionRequest fires just before the runtime would prompt
25-
// the user to confirm a tool call. Hook may auto-allow or auto-deny
26-
// via [HookSpecificOutput.PermissionDecision]; otherwise control
27-
// falls through to the interactive confirmation.
25+
// the user to confirm a tool call (i.e. when neither --yolo nor a
26+
// permissions rule short-circuited the decision and the tool is not
27+
// read-only). The hook can short-circuit the prompt by returning
28+
// [HookSpecificOutput.PermissionDecision] = "allow" (sets
29+
// [Result.PermissionAllowed] true — the runtime invokes the tool
30+
// without asking) or "deny" (sets [Result.Allowed] false — the
31+
// runtime rejects the tool with the hook's reason). Returning
32+
// nothing falls through to the interactive confirmation.
33+
//
34+
// Unlike pre_tool_use — where allow is the implicit default and only
35+
// deny carries new information — here allow is the explicit
36+
// auto-approve verdict; that asymmetry is why permission_request
37+
// has its own [Result.PermissionAllowed] flag separate from
38+
// [Result.Allowed].
2839
EventPermissionRequest EventType = "permission_request"
2940
// EventSessionStart fires when a session begins or resumes.
3041
EventSessionStart EventType = "session_start"

‎pkg/runtime/agent_delegation.go‎

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,17 @@ func mergeExcludedTools(parent, child []string) []string {
180180
// This is the "interactive" path used by transfer_task where the parent agent
181181
// loop is blocked while the child executes.
182182
func (r *LocalRuntime) runSubSessionForwarding(ctx context.Context, parent, child *session.Session, span trace.Span, evts chan Event, parentAgent *agent.Agent, subAgentName string) (*tools.ToolCallResult, error) {
183+
// subagent_stop fires after the child's stream has fully drained,
184+
// using the *parent* agent's executor so handlers configured on the
185+
// orchestrator see every child completion in one place — success or
186+
// failure. The deferred call ensures we don't lose the event when an
187+
// ErrorEvent triggers an early return below; handlers can detect a
188+
// failed run by an empty stop_response (or by correlating with the
189+
// session-level error event the parent already received).
190+
defer func() {
191+
r.executeSubagentStopHooks(ctx, parent, child, parentAgent, subAgentName, child.GetLastAssistantMessageContent())
192+
}()
193+
183194
childEvents := r.RunStream(ctx, child)
184195
for event := range childEvents {
185196
evts <- event
@@ -200,14 +211,8 @@ func (r *LocalRuntime) runSubSessionForwarding(ctx context.Context, parent, chil
200211
parent.AddSubSession(child)
201212
evts <- SubSessionCompleted(parent.ID, child, parentAgent.Name())
202213

203-
// subagent_stop fires after the child's stream has fully drained,
204-
// using the *parent* agent's executor so handlers configured on the
205-
// orchestrator see every child completion in one place.
206-
response := child.GetLastAssistantMessageContent()
207-
r.executeSubagentStopHooks(ctx, parent, child, parentAgent, subAgentName, response)
208-
209214
span.SetStatus(codes.Ok, "sub-session completed")
210-
return tools.ResultSuccess(response), nil
215+
return tools.ResultSuccess(child.GetLastAssistantMessageContent()), nil
211216
}
212217

213218
// runSubSessionCollecting runs a child session, collecting output via an
@@ -217,6 +222,17 @@ func (r *LocalRuntime) runSubSessionForwarding(ctx context.Context, parent, chil
217222
// It returns a RunResult containing either the final assistant message or
218223
// an error message.
219224
func (r *LocalRuntime) runSubSessionCollecting(ctx context.Context, parent, child *session.Session, subAgentName string, onContent func(string)) *agenttool.RunResult {
225+
// subagent_stop fires after the background sub-session has fully
226+
// drained — success or failure. The parent agent at the time of
227+
// dispatch (whoever called run_background_agent) owns the executor;
228+
// we resolve it via CurrentAgent because the background path doesn't
229+
// carry the parent agent name. dispatchHook silently no-ops when
230+
// CurrentAgent is nil. The deferred call ensures the hook fires even
231+
// when an ErrorEvent or ctx cancellation breaks us out of the loop.
232+
defer func() {
233+
r.executeSubagentStopHooks(ctx, parent, child, r.CurrentAgent(), subAgentName, child.GetLastAssistantMessageContent())
234+
}()
235+
220236
var errMsg string
221237
events := r.RunStream(ctx, child)
222238
for event := range events {
@@ -245,13 +261,6 @@ func (r *LocalRuntime) runSubSessionCollecting(ctx context.Context, parent, chil
245261
result := child.GetLastAssistantMessageContent()
246262
parent.AddSubSession(child)
247263

248-
// subagent_stop fires after the background sub-session has fully
249-
// drained. The parent agent at the time of dispatch (whoever called
250-
// run_background_agent) owns the executor; we resolve it via
251-
// CurrentAgent because the background path doesn't carry the parent
252-
// agent name. dispatchHook silently no-ops when CurrentAgent is nil.
253-
r.executeSubagentStopHooks(ctx, parent, child, r.CurrentAgent(), subAgentName, result)
254-
255264
return &agenttool.RunResult{Result: result}
256265
}
257266

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package runtime
2+
3+
import (
4+
"context"
5+
"sync/atomic"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
11+
"github.com/docker/docker-agent/pkg/agent"
12+
"github.com/docker/docker-agent/pkg/config/latest"
13+
"github.com/docker/docker-agent/pkg/hooks"
14+
"github.com/docker/docker-agent/pkg/session"
15+
"github.com/docker/docker-agent/pkg/team"
16+
)
17+
18+
// TestUserPromptSubmitFiresOncePerTopLevelTurn pins the contract that
19+
// user_prompt_submit fires exactly once per real user message in a
20+
// top-level session: not once per LLM call, not once per turn, but
21+
// once per submission. The runtime gates the dispatch in
22+
// [LocalRuntime.RunStream].
23+
func TestUserPromptSubmitFiresOncePerTopLevelTurn(t *testing.T) {
24+
t.Parallel()
25+
26+
calls, rt, sess := setupUserPromptSubmitCounter(t,
27+
session.WithUserMessage("hi"),
28+
)
29+
30+
for range rt.RunStream(t.Context(), sess) {
31+
}
32+
33+
assert.Equal(t, int32(1), calls.Load(),
34+
"user_prompt_submit must fire exactly once for a top-level user submission")
35+
}
36+
37+
// TestUserPromptSubmitSkippedForSubSessions pins the design choice
38+
// that user_prompt_submit fires for *human* prompts only. Sub-sessions
39+
// (transferred tasks, background agents, skill sub-sessions) carry a
40+
// runtime-synthesised "Please proceed." kick-off message that no human
41+
// authored, so firing the hook there would be noise. The runtime gates
42+
// the dispatch on [session.Session.SendUserMessage], which is exactly
43+
// the same flag the runtime uses to decide whether to emit a
44+
// [UserMessageEvent] \u2014 a sub-session sets it to false.
45+
func TestUserPromptSubmitSkippedForSubSessions(t *testing.T) {
46+
t.Parallel()
47+
48+
calls, rt, sess := setupUserPromptSubmitCounter(t,
49+
session.WithUserMessage("synthesised kick-off"),
50+
session.WithSendUserMessage(false),
51+
)
52+
53+
for range rt.RunStream(t.Context(), sess) {
54+
}
55+
56+
assert.Equal(t, int32(0), calls.Load(),
57+
"user_prompt_submit must NOT fire for sub-sessions (SendUserMessage=false): "+
58+
"their kick-off message is synthesised by the runtime, not authored by a human")
59+
}
60+
61+
// setupUserPromptSubmitCounter wires up a single-turn mock runtime with
62+
// a builtin user_prompt_submit hook that atomically increments the
63+
// returned counter on every dispatch. Both tests above share this
64+
// scaffolding so the only thing that varies between them is the
65+
// session's [session.WithSendUserMessage] flag.
66+
func setupUserPromptSubmitCounter(t *testing.T, opts ...session.Opt) (*atomic.Int32, *LocalRuntime, *session.Session) {
67+
t.Helper()
68+
69+
const counterName = "test-user-prompt-submit-counter"
70+
var calls atomic.Int32
71+
72+
stream := newStreamBuilder().
73+
AddContent("ok").
74+
AddStopWithUsage(3, 2).
75+
Build()
76+
prov := &mockProvider{id: "test/mock-model", stream: stream}
77+
78+
root := agent.New("root", "test agent",
79+
agent.WithModel(prov),
80+
agent.WithHooks(&latest.HooksConfig{
81+
UserPromptSubmit: []latest.HookDefinition{
82+
{Type: "builtin", Command: counterName},
83+
},
84+
}),
85+
)
86+
tm := team.New(team.WithAgents(root))
87+
88+
rt, err := NewLocalRuntime(tm,
89+
WithSessionCompaction(false),
90+
WithModelStore(mockModelStore{}),
91+
)
92+
require.NoError(t, err)
93+
94+
require.NoError(t, rt.hooksRegistry.RegisterBuiltin(
95+
counterName,
96+
func(_ context.Context, _ *hooks.Input, _ []string) (*hooks.Output, error) {
97+
calls.Add(1)
98+
return nil, nil
99+
},
100+
))
101+
102+
sess := session.New(opts...)
103+
sess.Title = "Unit Test"
104+
105+
return &calls, rt, sess
106+
}

0 commit comments

Comments
 (0)