Skip to content

Commit 87233ff

Browse files
authored
Merge pull request #2536 from dgageot/board/configurable-response-caching-for-agent-accd7756
feat(agent): add a configurable response cache
2 parents 784af06 + c4dc536 commit 87233ff

20 files changed

Lines changed: 1535 additions & 9 deletions

File tree

‎agent-schema.json‎

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,10 @@
398398
"$ref": "#/definitions/HooksConfig",
399399
"description": "Lifecycle hooks for executing shell commands at various points in the agent's execution"
400400
},
401+
"cache": {
402+
"$ref": "#/definitions/CacheConfig",
403+
"description": "Optional response cache: when the same user question is asked again, replay the previous answer instead of calling the model."
404+
},
401405
"skills": {
402406
"description": "Enable skills discovery for this agent. Set to true to load all discovered skills from local filesystem sources; false disables skills. A list can mix sources (\"local\" or an HTTP/HTTPS URL) and/or skill names to include. If only names are given, local sources are loaded and filtered to just those skills.",
403407
"oneOf": [
@@ -480,6 +484,32 @@
480484
},
481485
"additionalProperties": false
482486
},
487+
"CacheConfig": {
488+
"type": "object",
489+
"description": "Configuration for the agent's response cache. When enabled, the assistant response produced for a given user question is stored and replayed verbatim the next time the same question is asked, skipping the model entirely. Two normalization options control what 'same question' means: case_sensitive (default false) toggles case-insensitive matching, and trim_spaces (default false) strips leading/trailing whitespace before comparison. Set 'path' to persist entries to a JSON file (relative paths resolve against the agent config directory); leave it empty to keep entries in memory only.",
490+
"properties": {
491+
"enabled": {
492+
"type": "boolean",
493+
"description": "Set to true to enable the cache. When false (or when the cache section is omitted), no caching is performed.",
494+
"default": false
495+
},
496+
"case_sensitive": {
497+
"type": "boolean",
498+
"description": "When true, questions must match exactly (including case) to hit the cache. Default: false (case-insensitive matching).",
499+
"default": false
500+
},
501+
"trim_spaces": {
502+
"type": "boolean",
503+
"description": "When true, leading and trailing whitespace is stripped from questions before they are compared. Default: false.",
504+
"default": false
505+
},
506+
"path": {
507+
"type": "string",
508+
"description": "Path to a JSON file used to persist cache entries across runs. Relative paths are resolved against the agent's config directory. When empty, the cache lives only in memory."
509+
}
510+
},
511+
"additionalProperties": false
512+
},
483513
"HooksConfig": {
484514
"type": "object",
485515
"description": "Lifecycle hooks configuration for an agent. Hooks allow running shell commands at various points in the agent's execution lifecycle.",

‎docs/configuration/agents/index.md‎

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ agents:
4848
structured_output: # Optional: constrain output format
4949
name: string
5050
schema: object
51+
cache: # Optional: response cache (skip the model on repeat questions)
52+
enabled: boolean
53+
case_sensitive: boolean
54+
trim_spaces: boolean
55+
path: string
5156
```
5257
5358
<div class="callout callout-tip" markdown="1">
@@ -83,6 +88,7 @@ agents:
8388
| `handoffs` | array | ✗ | List of agent names this agent can hand off the conversation to. Enables the `handoff` tool. See [Handoffs Routing]({{ '/concepts/multi-agent/#handoffs-routing' | relative_url }}). |
8489
| `hooks` | object | ✗ | Lifecycle hooks for running commands at various points. See [Hooks]({{ '/configuration/hooks/' | relative_url }}). |
8590
| `structured_output` | object | ✗ | Constrain agent output to match a JSON schema. See [Structured Output]({{ '/configuration/structured-output/' | relative_url }}). |
91+
| `cache` | object | ✗ | Response cache. When the same user question is asked again, the previous answer is replayed verbatim and the model is not called. See [Response Cache](#response-cache) below. |
8692

8793
<div class="callout callout-warning" markdown="1">
8894
<div class="callout-title">⚠️ max_iterations
@@ -91,6 +97,47 @@ agents:
9197

9298
</div>
9399

100+
## Response Cache
101+
102+
The response cache short-circuits the model when the same user question is asked again. The first time a question is asked, the agent calls the model normally and stores the assistant's reply. Subsequent identical questions skip the model entirely and replay the stored reply verbatim.
103+
104+
```yaml
105+
agents:
106+
root:
107+
model: openai/gpt-5-mini
108+
description: Cached assistant
109+
instruction: You are a helpful assistant.
110+
cache:
111+
enabled: true # required to turn the cache on
112+
case_sensitive: false # default: false ("Hello" == "hello")
113+
trim_spaces: true # default: false (" hello " == "hello")
114+
path: ./cache.json # optional: persist to disk; omit for in-memory
115+
```
116+
117+
| Property | Type | Default | Description |
118+
| ---------------- | ------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
119+
| `enabled` | boolean | `false` | Master switch. When `false` (or when the `cache` section is omitted), no caching is performed. |
120+
| `case_sensitive` | boolean | `false` | When `true`, questions must match exactly (including case) to hit the cache. |
121+
| `trim_spaces` | boolean | `false` | When `true`, leading and trailing whitespace is stripped from the question before it is compared. |
122+
| `path` | string | _empty_ | When set, cache entries are persisted to a JSON file at the given path and reloaded on startup so the cache survives restarts. Relative paths resolve against the agent config directory. When empty, the cache lives in memory only. |
123+
124+
**How it works**
125+
126+
- The cache key is the latest user message in the session, normalized according to `case_sensitive` and `trim_spaces`.
127+
- On a hit, the cached reply is added to the session as the assistant message and stop hooks fire normally — the rest of the agent (tools, sub-agents, the model) is bypassed.
128+
- On a miss, the agent runs normally; the final assistant message produced by the first stop of the run is then stored under the question's key.
129+
- Only the response to the original user question of a run is cached; follow-up turns inside the same `RunStream` are not.
130+
131+
**File-backed storage**
132+
133+
When `path` is set, every `Store` rewrites the entire cache file. Writes are **atomic**: the new content is written to a sibling temp file, `fsync`'d, and renamed over the destination, so a concurrent reader (or a process that crashes mid-write) will always see either the previous content or the new content in full — never a partially written file. The parent directory is also `fsync`'d after the rename so the rename itself is durable.
134+
135+
**Cross-process sharing**
136+
137+
Multiple processes can share the same `path:` cache file safely. Every `Store` takes an exclusive advisory lock on a sibling `<path>.lock` file (POSIX `flock(2)` on Unix, `LockFileEx` on Windows), reloads the current on-disk state under the lock, merges the new entry, and writes back atomically. Two processes that store *different* keys at the same time both see their writes preserved on disk; the lock window is short (one read + one fsync'd write).
138+
139+
`Lookup` watches the file's modification time and reloads the in-memory map when the file has advanced since its last load, so writes from a sibling process become visible without a restart. The `<path>.lock` sentinel file is created on first write and never deleted: removing it would let two processes lock different inodes and lose mutual exclusion.
140+
94141
## Welcome Message
95142

96143
Display a message when users start a session:

‎examples/cached_responses.yaml‎

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env docker agent run
2+
3+
# Demonstrates the response cache.
4+
#
5+
# The first time a question is asked, the agent calls the model normally and
6+
# stores the answer. The second time the same question is asked, the answer is
7+
# replayed verbatim and the model is not invoked at all.
8+
#
9+
# Two normalization options control what "same question" means:
10+
# - case_sensitive: when false (the default), "Hello" and "hello" match.
11+
# - trim_spaces: when true, leading and trailing whitespace is ignored.
12+
#
13+
# Storage is in-memory by default. Set `path` to persist entries to a JSON
14+
# file that is reloaded on startup so the cache survives restarts. Multiple
15+
# processes can safely share the same file: an advisory lock on
16+
# `<path>.lock` serializes writes, and Lookup reloads the in-memory map
17+
# when the file changes externally.
18+
19+
agents:
20+
root:
21+
model: openai/gpt-5-mini
22+
description: A helpful AI assistant with a response cache
23+
instruction: |
24+
You are a knowledgeable assistant that helps users with various tasks.
25+
Be helpful, accurate, and concise in your responses.
26+
cache:
27+
enabled: true
28+
case_sensitive: false # "Hello" == "hello"
29+
trim_spaces: true # " hello " == "hello"
30+
path: ./cache.json # remove this line to keep the cache in memory only

‎pkg/agent/agent.go‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"sync/atomic"
1010
"time"
1111

12+
"github.com/docker/docker-agent/pkg/cache"
1213
"github.com/docker/docker-agent/pkg/config/latest"
1314
"github.com/docker/docker-agent/pkg/config/types"
1415
"github.com/docker/docker-agent/pkg/model/provider"
@@ -41,6 +42,7 @@ type Agent struct {
4142
tools []tools.Tool
4243
commands types.Commands
4344
hooks *latest.HooksConfig
45+
cache *cache.Cache
4446

4547
// warningsMu guards pendingWarnings. addToolWarning and DrainWarnings
4648
// may be called concurrently from the runtime loop, the MCP server,
@@ -254,6 +256,12 @@ func (a *Agent) Hooks() *latest.HooksConfig {
254256
return a.hooks
255257
}
256258

259+
// Cache returns the response cache configured for this agent, or nil when
260+
// caching is disabled.
261+
func (a *Agent) Cache() *cache.Cache {
262+
return a.cache
263+
}
264+
257265
// Tools returns the tools available to this agent
258266
func (a *Agent) Tools(ctx context.Context) ([]tools.Tool, error) {
259267
a.ensureToolSetsAreStarted(ctx)

‎pkg/agent/opts.go‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package agent
33
import (
44
"time"
55

6+
"github.com/docker/docker-agent/pkg/cache"
67
"github.com/docker/docker-agent/pkg/config/latest"
78
"github.com/docker/docker-agent/pkg/config/types"
89
"github.com/docker/docker-agent/pkg/model/provider"
@@ -172,3 +173,10 @@ func WithHooks(hooks *latest.HooksConfig) Opt {
172173
a.hooks = hooks
173174
}
174175
}
176+
177+
// WithCache attaches a response cache to the agent. Pass nil to disable.
178+
func WithCache(c *cache.Cache) Opt {
179+
return func(a *Agent) {
180+
a.cache = c
181+
}
182+
}

0 commit comments

Comments
 (0)