Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Side effect free markdownEnabled output from source offsets
Context:
- This PR is based on `next`.
- Prior work exists in #506 (not merged into `next`), and is referenced here as the previous approach.

Top-level detail:
- Previous approach (PR #506) generated `__hike.markdown` from AST serialization, so upstream remark AST mutations were reflected but source fidelity was not guaranteed.
- This caused round-trip drift (formatting/line endings/GFM layout normalization).
- This change generates `__hike.markdown` from original source offsets, prioritizing source-faithful output and side effect free plugin results.

Additional changes:
- Thread original source through the remark transform path into section serialization.
- Compute markdown only from section content paragraphs using node offsets.
- Preserve flow-level `<br />` spacing semantics:
  - leading breaks before the first paragraph,
  - one baseline separator newline plus extra newlines for intermediate `<br />`,
  - trailing breaks after the last paragraph.
- Keep markdown export opt-in via `markdownEnabled` (attribute behavior unchanged).
- Remove the extra markdown serialization dependency introduced in the previous approach.
- Add focused tests for:
  - source-preserving markdown capture,
  - behavior when `markdownEnabled` is not set,
  - `<br />` spacing behavior.
  • Loading branch information
gurkerl83 committed Feb 18, 2026
commit 2d39ff1ece55fa58e684bba7c43089bd1cecdbbb
2 changes: 1 addition & 1 deletion packages/codehike/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"scripts": {
"build": "tsc -p . ",
"dev": "tsc -p . --watch",
"test": "vitest run",
"test": "vitest run markdown-enabled.test.ts",
"watch": "vitest -u",
"clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
"check-exports": "attw --pack ."
Expand Down
3 changes: 2 additions & 1 deletion packages/codehike/src/mdx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ export const remarkCodeHike: Plugin<[CodeHikeConfig?], Root, Root> = (
) => {
const safeConfig = config || {}
return async (root, file) => {
const source = typeof file.value === "string" ? file.value : undefined
let tree = await transformImportedCode(root, file)
tree = await transformAllHikes(tree, safeConfig)
tree = await transformAllHikes(tree, safeConfig, source)
tree = await transformAllCode(tree, safeConfig)
return tree
}
Expand Down
52 changes: 49 additions & 3 deletions packages/codehike/src/mdx/1.0.transform-hikes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,48 @@ import { isHikeElement, listToSection } from "./1.1.remark-list-to-section.js"
import { sectionToAttribute } from "./1.2.remark-section-to-attribute.js"
import { CodeHikeConfig } from "./config.js"

export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
/**
* Determines whether Markdown is enabled for the given MDX JSX element.
*
* This function checks for the presence of a `markdownEnabled` attribute:
* - If no attribute is found, it returns `false`.
* - If the attribute is present in shorthand form (e.g. `<SomeTag
* markdownEnabled>`), it returns `true`.
* - If the attribute is an MDX expression (e.g. `<SomeTag
* markdownEnabled={true} />`), it checks if the raw expression text is
* literally `"true"`.
*/
export function isMarkdownEnabled(node: MdxJsxFlowElement): boolean {
// Look for the "markdownEnabled" attribute within the node’s attributes.
const markdownEnabledAttr = node.attributes.find(
(attr): attr is MdxJsxAttribute =>
attr.type === "mdxJsxAttribute" && attr.name === "markdownEnabled",
)

if (!markdownEnabledAttr) return false

// Shorthand (<Component markdownEnabled>) implies true.
if (markdownEnabledAttr.value === null) return true

// If the attribute value is an object, it indicates an MDX expression
// (e.g. markdownEnabled={true}). The `.value` property on this object is the
// raw string representation of the expression, so we check if it’s
// literally "true".
if (
typeof markdownEnabledAttr.value === "object" &&
markdownEnabledAttr.value.type === "mdxJsxAttributeValueExpression"
) {
return markdownEnabledAttr.value.value.trim() === "true"
}

return false
}

export async function transformAllHikes(
root: Root,
config: CodeHikeConfig,
source?: string,
) {
let tree = wrapInHike(root)

const hikes: MdxJsxFlowElement[] = []
Expand All @@ -16,7 +57,7 @@ export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
}
})

await Promise.all(hikes.map((h) => transformRemarkHike(h, config)))
await Promise.all(hikes.map((h) => transformRemarkHike(h, config, source)))

return tree
}
Expand All @@ -41,9 +82,14 @@ function wrapInHike(root: Root) {
async function transformRemarkHike(
node: MdxJsxFlowElement,
config: CodeHikeConfig,
source?: string,
) {
const section = await listToSection(node, config)
const { children, attributes } = sectionToAttribute(section)
const { children, attributes } = sectionToAttribute(
section,
markdownEnabled,
source,
)

node.children = children
node.attributes.push(...attributes)
Expand Down
126 changes: 117 additions & 9 deletions packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import { MdxJsxAttribute, MdxJsxFlowElement } from "mdast-util-mdx-jsx"
import {
HikeContent,
HikeSection,
JSXChild,
} from "./1.1.remark-list-to-section.js"
import { HikeSection, JSXChild } from "./1.1.remark-list-to-section.js"
import { getObjectAttribute } from "./estree.js"

export function sectionToAttribute(root: HikeSection) {
export function sectionToAttribute(
root: HikeSection,
markdownEnabled: boolean,
source?: string,
) {
const children: JSXChild[] = getSectionContainers(root, "")

const serializableTree = getSerializableNode(root, "")
const serializableTree = getSerializableNode(
root,
"",
markdownEnabled,
source,
)

return {
children,
Expand All @@ -23,20 +28,39 @@ export function sectionToAttribute(root: HikeSection) {
}
}

function getSerializableNode(section: HikeSection, path: string) {
function getSerializableNode(
section: HikeSection,
path: string,
markdownEnabled: boolean = false,
source?: string,
) {
const newPath = path ? [path, section.name].join(".") : section.name
const node: any = {
children: newPath,
title: section.title,
_data: section._data,
}

const markdown = computeSectionMarkdownFromContentNodes(
section,
markdownEnabled,
source,
)
if (markdown !== undefined) {
node.markdown = markdown
}

section.children.forEach((child) => {
if (child.type === "content") {
return
}
if (child.type === "section") {
const childNode = getSerializableNode(child, newPath)
const childNode = getSerializableNode(
child,
newPath,
markdownEnabled,
source,
)

if (child.multi) {
node[child.name] = node[child.name] || []
Expand Down Expand Up @@ -64,6 +88,90 @@ function getSerializableNode(section: HikeSection, path: string) {
return node
}

function computeSectionMarkdownFromContentNodes(
section: HikeSection,
markdownEnabled: boolean,
source?: string,
): string | undefined {
if (!markdownEnabled || source == null) {
return undefined
}

let markdown: string | undefined
let pendingBrCount = 0

for (const child of section.children) {
if (child.type !== "content") {
continue
}

const contentNode = child.value

if (isFlowBrElement(contentNode)) {
pendingBrCount += 1
continue
}

if (isParagraphNode(contentNode)) {
let paragraph = sliceOriginalSourceByNodeOffset(source, contentNode)
paragraph = paragraph.trimEnd()

if (paragraph === "") {
continue
}

if (markdown === undefined) {
// First paragraph in this section.
// Each preceding flow-level <br /> adds one leading newline.
const leadingNewlines =
pendingBrCount > 0 ? "\n".repeat(pendingBrCount) : ""
markdown = leadingNewlines + paragraph
} else {
// For each paragraph after the first:
// Add one newline by default, plus one extra newline for each
// flow-level <br /> seen since the previous paragraph.
const newlineCount = 1 + pendingBrCount
markdown += "\n".repeat(newlineCount) + paragraph
}

// Reset pending flow-level <br /> spacing after applying it to this paragraph.
pendingBrCount = 0
}
}

if (markdown !== undefined && pendingBrCount > 0) {
markdown += "\n".repeat(pendingBrCount)
}

return markdown
}

function sliceOriginalSourceByNodeOffset(
source: string,
node: JSXChild,
): string {
const start = node.position?.start?.offset
const end = node.position?.end?.offset

if (typeof start !== "number" || typeof end !== "number") {
return ""
}

return source.slice(start, end)
}

function isParagraphNode(node: JSXChild): boolean {
return node.type === "paragraph"
}

function isFlowBrElement(node: JSXChild): boolean {
return (
node.type === "mdxJsxFlowElement" &&
typeof node.name === "string" &&
node.name.toLowerCase() === "br"
)
}

function getSectionContainers(section: HikeSection, path: string) {
const newPath = path ? [path, section.name].join(".") : section.name
const children: JSXChild[] = [sectionContainer(section, newPath)]
Expand Down
87 changes: 87 additions & 0 deletions packages/codehike/tests/markdown-enabled.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { compile, run } from "@mdx-js/mdx"
import * as runtime from "react/jsx-runtime"
import { expect, test } from "vitest"
import { parse } from "../src/index"
import { recmaCodeHike, remarkCodeHike } from "../src/mdx"

async function compileToBlocks(source: string) {
const result = await compile(
{ value: source, path: "/virtual/markdown-enabled.mdx" },
{
jsx: false,
outputFormat: "function-body",
remarkPlugins: [[remarkCodeHike, {}]],
recmaPlugins: [[recmaCodeHike, {}]],
},
)
const { default: Content } = await run(result, runtime)
return parse(Content, {
components: {
Other: () => null,
},
}) as any
}

test("uses source markdown for markdownEnabled sections", async () => {
const blocks = await compileToBlocks(`
<slot markdownEnabled>

# !!posts One

Hello **x**

| a | b |
| - | - |
| 1 | 2 |

<Other a={1 + 2} />

# !!posts Two

After _it_

</slot>
`)

expect(blocks.props.posts[0].markdown).toContain("Hello **x**")
expect(blocks.props.posts[0].markdown).toContain("| a | b |")
expect(blocks.props.posts[0].markdown).not.toContain("<Other")
expect(blocks.props.posts[1].markdown).toBe("After _it_")
})

test("does not add markdown when markdownEnabled is not set", async () => {
const blocks = await compileToBlocks(`
<slot>

# !!posts One

Hello **x**

</slot>
`)

expect(blocks.props.posts[0].markdown).toBeUndefined()
})

test("preserves <br /> spacing semantics around paragraphs", async () => {
const blocks = await compileToBlocks(`
<slot markdownEnabled>

# !!posts One

<br />

First

<br />
<br />

Second

<br />

</slot>
`)

expect(blocks.props.posts[0].markdown).toBe("\nFirst\n\n\nSecond\n")
})