Skip to content

Commit e0d7bb2

Browse files
mdrxyopen-swenpentrel
authored
feat: add CI check for unresolved @[ref] cross-references (#3051)
## Description Adds a check that validates all `@[ref]` cross-references in source MDX/MD files resolve against entries in `link_map.py`. Previously, unresolved references silently passed through the build pipeline and appeared as raw `@[ClassName]` text in the published docs. Available via `make check-cross-refs`. Note: I couldn't modify `.github/workflows/_check-links.yml` due to workflow permissions. To integrate into CI, add this step to `_check-links.yml` after "Install Python dependencies": ```yaml - name: Check for unresolved cross-references run: make check-cross-refs ``` ## Test Plan - [ ] `make check-cross-refs` reports unresolved references with file, line number, and scope - [ ] 12 unit tests covering: valid refs, unresolved refs, scope fences, code block skipping, escaped refs, titled refs, backtick refs, and code-samples exclusion --------- Co-authored-by: open-swe[bot] <open-swe@users.noreply.github.com> Co-authored-by: Naomi Pentrel <5212232+npentrel@users.noreply.github.com>
1 parent 1257451 commit e0d7bb2

5 files changed

Lines changed: 285 additions & 2 deletions

File tree

‎.github/workflows/ci.yml‎

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,18 @@ jobs:
6363
./.github/workflows/_check-links.yml
6464
with:
6565
python-version: ${{ matrix.python-version }}
66+
check-cross-refs:
67+
runs-on: ubuntu-latest
68+
timeout-minutes: 5
69+
permissions:
70+
contents: read
71+
steps:
72+
- uses: actions/checkout@v6
73+
- uses: "./.github/actions/uv_setup"
74+
with:
75+
python-version: "3.13"
76+
- run: uv sync --group test
77+
- run: make check-cross-refs
6678
check-generated-files:
6779
permissions:
6880
contents: read

‎Makefile‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: all dev build format lint test install clean lint_md lint_md_fix lint_prose broken-links broken-links-with-anchors format-check code-snippets test-code-samples
1+
.PHONY: all dev build format lint test install clean lint_md lint_md_fix lint_prose broken-links broken-links-with-anchors format-check code-snippets test-code-samples check-cross-refs
22

33
# Default target
44
all: help
@@ -149,11 +149,16 @@ test-code-samples:
149149
@if [ -f src/code-samples/package.json ]; then (cd src/code-samples && npm install --silent) || true; fi
150150
@FILES="$(FILES)" PYTHONPATH=$(CURDIR) python scripts/test_code_samples.py
151151

152+
# Check that all @[ref] cross-references in source files resolve against link_map.py
153+
check-cross-refs:
154+
@PYTHONPATH=$(CURDIR) uv run python scripts/check_cross_refs.py
155+
152156
help:
153157
@echo "Available commands:"
154158
@echo " make dev - Start development mode with file watching and mint dev"
155159
@echo " make build - Build documentation to ./build directory"
156160
@echo " make broken-links - Check for broken links in built documentation"
161+
@echo " make check-cross-refs - Check for unresolved @[ref] cross-references"
157162
@echo " make broken-links-with-anchors - Same as above, also validates anchor links"
158163
@echo " make format - Format code"
159164
@echo " make lint - Lint code"

‎pipeline/preprocessors/link_map.py‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,10 @@ class LinkMap(TypedDict):
281281
"SqliteSaver": "langgraph/checkpoints/#langgraph.checkpoint.sqlite.SqliteSaver",
282282
"JsonPlusSerializer": "langgraph/checkpoints/#langgraph.checkpoint.serde.jsonplus.JsonPlusSerializer",
283283
"PostgresSaver": "langgraph/checkpoints/#langgraph.checkpoint.postgres.PostgresSaver",
284+
"CosmosDBSaver": "langgraph-checkpoint-cosmosdb/",
285+
"AsyncCosmosDBSaver": "langgraph-checkpoint-cosmosdb/",
284286
"PostgresStore": "langgraph/store/#langgraph.store.postgres.PostgresStore",
287+
"AsyncSqliteStore": "langgraph/store/#langgraph.store.sqlite.AsyncSqliteStore",
285288
"create_react_agent": "langchain-classic/agents/react/agent/create_react_agent",
286289
"LastValue": "langgraph/channels/last_value/LastValue",
287290
"START": "langgraph/constants/START",
@@ -516,6 +519,7 @@ class LinkMap(TypedDict):
516519
"entrypoint": "langchain-langgraph/index/entrypoint",
517520
"entrypoint.final": "functions/_langchain_langgraph.index.entrypoint.html#final",
518521
"get_state_history": "classes/_langchain_langgraph.pregel.Pregel.html#getStateHistory",
522+
"get_state": "classes/_langchain_langgraph.pregel.Pregel.html#getState",
519523
"getStateHistory": "classes/_langchain_langgraph.pregel.Pregel.html#getStateHistory",
520524
"HumanInterrupt": "langchain-langgraph/prebuilt/HumanInterrupt",
521525
"interrupt": "langchain-langgraph/index/interrupt",
@@ -533,7 +537,6 @@ class LinkMap(TypedDict):
533537
"Runtime": "langchain/index/Runtime",
534538
"ToolNode": "langchain-langgraph/prebuilt/ToolNode",
535539
# Python-named aliases for cross-scope compatibility
536-
"get_state": "classes/_langchain_langgraph.pregel.Pregel.html#getState",
537540
"create_agent": "langchain/index/createAgent",
538541
"init_chat_model": "langchain/chat_models/universal/initChatModel",
539542
"tools_condition": "langchain-langgraph/prebuilt/toolsCondition",

‎scripts/check_cross_refs.py‎

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
"""Check for unresolved cross-references in documentation source files.
2+
3+
Scans all .mdx and .md files under src/ for @[ref] patterns and validates
4+
that each reference has a corresponding entry in link_map.py. Respects
5+
:::python/:::js scope fences so references are checked against the correct
6+
scope's link map.
7+
8+
Exit code 0 if all references resolve, 1 if any are unresolved.
9+
"""
10+
11+
import sys
12+
from pathlib import Path
13+
14+
from pipeline.preprocessors.handle_auto_links import (
15+
CODE_FENCE_PATTERN,
16+
CONDITIONAL_FENCE_PATTERN,
17+
CROSS_REFERENCE_PATTERN,
18+
)
19+
from pipeline.preprocessors.link_map import SCOPE_LINK_MAPS
20+
21+
SRC_DIR = Path(__file__).resolve().parent.parent / "src"
22+
23+
SCOPES_FOR_PATH: dict[str, list[str]] = {
24+
"oss/python/": ["python"],
25+
"oss/javascript/": ["js"],
26+
}
27+
28+
29+
def _default_scopes_for_file(rel_path: str) -> list[str]:
30+
"""Return the default scope(s) a file is built under."""
31+
for prefix, scopes in SCOPES_FOR_PATH.items():
32+
if rel_path.startswith(prefix):
33+
return scopes
34+
if rel_path.startswith("oss/"):
35+
return ["python", "js"]
36+
# Non-OSS content (e.g., langsmith/) only generates Python-scope pages
37+
return ["python"]
38+
39+
40+
def _extract_refs(
41+
content: str,
42+
default_scopes: list[str],
43+
) -> list[tuple[int, str, list[str]]]:
44+
"""Extract (line_number, ref_name, scopes) from file content."""
45+
refs: list[tuple[int, str, list[str]]] = []
46+
current_scopes = default_scopes
47+
in_code_block = False
48+
49+
for line_number, line in enumerate(content.splitlines(), 1):
50+
stripped = line.strip()
51+
52+
if CODE_FENCE_PATTERN.match(stripped):
53+
in_code_block = not in_code_block
54+
continue
55+
if in_code_block:
56+
continue
57+
58+
fence_match = CONDITIONAL_FENCE_PATTERN.match(stripped)
59+
if fence_match:
60+
language = fence_match.group("language")
61+
if language and language.lower() in ("python", "js"):
62+
current_scopes = [language.lower()]
63+
else:
64+
current_scopes = default_scopes
65+
continue
66+
67+
for match in CROSS_REFERENCE_PATTERN.finditer(line):
68+
# CROSS_REFERENCE_PATTERN uses (?<!\\) lookbehind to skip escaped refs
69+
ref_name = match.group("link_name_with_title") or match.group("link_name")
70+
if ref_name:
71+
refs.append((line_number, ref_name, list(current_scopes)))
72+
73+
return refs
74+
75+
76+
def check_cross_refs(src_dir: Path) -> list[tuple[str, int, str, list[str]]]:
77+
"""Return list of (file, line, ref_name, scopes) for unresolved refs."""
78+
errors: list[tuple[str, int, str, list[str]]] = []
79+
80+
md_files = sorted(
81+
[*src_dir.rglob("*.mdx"), *src_dir.rglob("*.md")],
82+
)
83+
84+
for file_path in md_files:
85+
rel_path = str(file_path.relative_to(src_dir))
86+
87+
if rel_path.startswith("snippets/code-samples/"):
88+
continue
89+
if "node_modules" in rel_path:
90+
continue
91+
92+
try:
93+
content = file_path.read_text(encoding="utf-8")
94+
except UnicodeDecodeError as exc:
95+
print(f" WARNING: skipping {rel_path} (not valid UTF-8: {exc})")
96+
continue
97+
default_scopes = _default_scopes_for_file(rel_path)
98+
refs = _extract_refs(content, default_scopes)
99+
100+
for line_number, ref_name, scopes in refs:
101+
resolved = any(
102+
ref_name in SCOPE_LINK_MAPS.get(scope, {}) for scope in scopes
103+
)
104+
if not resolved:
105+
errors.append((rel_path, line_number, ref_name, scopes))
106+
107+
return errors
108+
109+
110+
def main() -> None:
111+
"""CLI entrypoint for cross-reference validation."""
112+
errors = check_cross_refs(SRC_DIR)
113+
114+
if not errors:
115+
print("✅ All cross-references resolved")
116+
sys.exit(0)
117+
118+
print(f"found {len(errors)} unresolved cross-reference(s):\n")
119+
for file_path, line_number, ref_name, scopes in errors:
120+
scope_str = ", ".join(scopes)
121+
print(
122+
f" {file_path}:{line_number}: @[{ref_name}] not in scope(s): {scope_str}"
123+
)
124+
125+
print(
126+
f"\n{len(errors)} unresolved cross-reference(s). "
127+
"Add entries to pipeline/preprocessors/link_map.py or fix the reference."
128+
)
129+
sys.exit(1)
130+
131+
132+
if __name__ == "__main__":
133+
main()
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""Tests for the cross-reference checker script."""
2+
3+
import tempfile
4+
from pathlib import Path
5+
6+
from scripts.check_cross_refs import check_cross_refs
7+
8+
9+
def _write(tmp: Path, rel_path: str, content: str) -> None:
10+
p = tmp / rel_path
11+
p.parent.mkdir(parents=True, exist_ok=True)
12+
p.write_text(content, encoding="utf-8")
13+
14+
15+
def test_valid_python_ref() -> None:
16+
"""Known Python-scope ref resolves."""
17+
with tempfile.TemporaryDirectory() as tmp:
18+
src = Path(tmp)
19+
_write(src, "langsmith/page.mdx", "Use @[StateGraph] here.")
20+
errors = check_cross_refs(src)
21+
assert errors == []
22+
23+
24+
def test_unresolved_ref() -> None:
25+
"""Unknown ref is reported."""
26+
with tempfile.TemporaryDirectory() as tmp:
27+
src = Path(tmp)
28+
_write(src, "langsmith/page.mdx", "Use @[NonExistentWidget] here.")
29+
errors = check_cross_refs(src)
30+
assert len(errors) == 1
31+
assert errors[0][2] == "NonExistentWidget"
32+
33+
34+
def test_js_scope_fence() -> None:
35+
"""Ref inside :::js fence is checked against js scope."""
36+
with tempfile.TemporaryDirectory() as tmp:
37+
src = Path(tmp)
38+
content = ":::js\n@[StateGraph]\n:::\n"
39+
_write(src, "oss/page.mdx", content)
40+
errors = check_cross_refs(src)
41+
assert errors == []
42+
43+
44+
def test_js_only_ref_in_python_scope_fails() -> None:
45+
"""JS-only ref is unresolved when file defaults to Python scope."""
46+
with tempfile.TemporaryDirectory() as tmp:
47+
src = Path(tmp)
48+
_write(src, "oss/python/page.mdx", "@[wrapVitest]")
49+
errors = check_cross_refs(src)
50+
assert len(errors) == 1
51+
assert errors[0][2] == "wrapVitest"
52+
53+
54+
def test_oss_shared_file_checks_both_scopes() -> None:
55+
"""Shared oss/ file (not python/ or javascript/) checks both scopes."""
56+
with tempfile.TemporaryDirectory() as tmp:
57+
src = Path(tmp)
58+
_write(src, "oss/page.mdx", "@[StateGraph]")
59+
errors = check_cross_refs(src)
60+
assert errors == []
61+
62+
63+
def test_ref_inside_code_block_ignored() -> None:
64+
"""Refs inside fenced code blocks are not checked."""
65+
with tempfile.TemporaryDirectory() as tmp:
66+
src = Path(tmp)
67+
content = "```python\n@[NonExistentWidget]\n```\n"
68+
_write(src, "langsmith/page.mdx", content)
69+
errors = check_cross_refs(src)
70+
assert errors == []
71+
72+
73+
def test_escaped_ref_ignored() -> None:
74+
r"""Escaped refs (\@[...]) are not checked."""
75+
with tempfile.TemporaryDirectory() as tmp:
76+
src = Path(tmp)
77+
_write(src, "langsmith/page.mdx", "\\@[NonExistentWidget]")
78+
errors = check_cross_refs(src)
79+
assert errors == []
80+
81+
82+
def test_titled_ref_format() -> None:
83+
"""@[title][ref] format extracts the ref name correctly."""
84+
with tempfile.TemporaryDirectory() as tmp:
85+
src = Path(tmp)
86+
_write(src, "langsmith/page.mdx", "@[my title][StateGraph]")
87+
errors = check_cross_refs(src)
88+
assert errors == []
89+
90+
91+
def test_titled_ref_unresolved() -> None:
92+
"""@[title][unknown] format reports the ref name."""
93+
with tempfile.TemporaryDirectory() as tmp:
94+
src = Path(tmp)
95+
_write(src, "langsmith/page.mdx", "@[my title][UnknownRef]")
96+
errors = check_cross_refs(src)
97+
assert len(errors) == 1
98+
assert errors[0][2] == "UnknownRef"
99+
100+
101+
def test_code_samples_snippets_skipped() -> None:
102+
"""Files under snippets/code-samples/ are skipped."""
103+
with tempfile.TemporaryDirectory() as tmp:
104+
src = Path(tmp)
105+
_write(src, "snippets/code-samples/example.mdx", "@[NonExistentWidget]")
106+
errors = check_cross_refs(src)
107+
assert errors == []
108+
109+
110+
def test_backtick_ref() -> None:
111+
"""@[`ClassName`] format resolves correctly."""
112+
with tempfile.TemporaryDirectory() as tmp:
113+
src = Path(tmp)
114+
_write(src, "langsmith/page.mdx", "@[`StateGraph`]")
115+
errors = check_cross_refs(src)
116+
assert errors == []
117+
118+
119+
def test_multiple_refs_on_same_line() -> None:
120+
"""Multiple refs on the same line are all checked."""
121+
with tempfile.TemporaryDirectory() as tmp:
122+
src = Path(tmp)
123+
_write(
124+
src,
125+
"langsmith/page.mdx",
126+
"Use @[StateGraph] and @[NonExistentWidget] together.",
127+
)
128+
errors = check_cross_refs(src)
129+
assert len(errors) == 1
130+
assert errors[0][2] == "NonExistentWidget"

0 commit comments

Comments
 (0)