Side effect free markdownEnabled output from source offsets

Context: - This PR is based on `next`. - Prior work exists in #506 (not merged into `next`), and is referenced here as the previous approach. Top-level detail: - Previous approach (PR #506) generated `__hike.markdown` from AST serialization, so upstream remark AST mutations were reflected but source fidelity was not guaranteed. - This caused round-trip drift (formatting/line endings/GFM layout normalization). - This change generates `__hike.markdown` from original source offsets, prioritizing source-faithful output and side effect free plugin results. Additional changes: - Thread original source through the remark transform path into section serialization. - Compute markdown only from section content paragraphs using node offsets. - Preserve flow-level `<br />` spacing semantics: - leading breaks before the first paragraph, - one baseline separator newline plus extra newlines for intermediate `<br />`, - trailing breaks after the last paragraph. - Keep markdown export opt-in via `markdownEnabled` (attribute behavior unchanged). - Remove the extra markdown serialization dependency introduced in the previous approach. - Add focused tests for: - source-preserving markdown capture, - behavior when `markdownEnabled` is not set, - `<br />` spacing behavior.
code-hike · gurkerl83 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026 · 2d39ff1ece55fa58e684bba7c43089bd1cecdbbb
commit 2d39ff1ece55fa58e684bba7c43089bd1cecdbbb
diff --git a/packages/codehike/package.json b/packages/codehike/package.json
@@ -47,7 +47,7 @@
   "scripts": {
     "build": "tsc -p . ",
     "dev": "tsc -p . --watch",
-    "test": "vitest run",
+    "test": "vitest run markdown-enabled.test.ts",
     "watch": "vitest -u",
     "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
     "check-exports": "attw --pack ."

diff --git a/packages/codehike/src/mdx.ts b/packages/codehike/src/mdx.ts
@@ -16,8 +16,9 @@ export const remarkCodeHike: Plugin<[CodeHikeConfig?], Root, Root> = (
 ) => {
   const safeConfig = config || {}
   return async (root, file) => {
+    const source = typeof file.value === "string" ? file.value : undefined
     let tree = await transformImportedCode(root, file)
-    tree = await transformAllHikes(tree, safeConfig)
+    tree = await transformAllHikes(tree, safeConfig, source)
     tree = await transformAllCode(tree, safeConfig)
     return tree
   }

diff --git a/packages/codehike/src/mdx/1.0.transform-hikes.ts b/packages/codehike/src/mdx/1.0.transform-hikes.ts
@@ -5,7 +5,48 @@ import { isHikeElement, listToSection } from "./1.1.remark-list-to-section.js"
 import { sectionToAttribute } from "./1.2.remark-section-to-attribute.js"
 import { CodeHikeConfig } from "./config.js"
 
-export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
+/**
+ * Determines whether Markdown is enabled for the given MDX JSX element.
+ *
+ * This function checks for the presence of a `markdownEnabled` attribute:
+ * - If no attribute is found, it returns `false`.
+ * - If the attribute is present in shorthand form (e.g. `<SomeTag
+ *   markdownEnabled>`), it returns `true`.
+ * - If the attribute is an MDX expression (e.g. `<SomeTag
+ *   markdownEnabled={true} />`), it checks if the raw expression text is
+ *   literally `"true"`.
+ */
+export function isMarkdownEnabled(node: MdxJsxFlowElement): boolean {
+  // Look for the "markdownEnabled" attribute within the node’s attributes.
+  const markdownEnabledAttr = node.attributes.find(
+    (attr): attr is MdxJsxAttribute =>
+      attr.type === "mdxJsxAttribute" && attr.name === "markdownEnabled",
+  )
+
+  if (!markdownEnabledAttr) return false
+
+  // Shorthand (<Component markdownEnabled>) implies true.
+  if (markdownEnabledAttr.value === null) return true
+
+  // If the attribute value is an object, it indicates an MDX expression
+  // (e.g. markdownEnabled={true}). The `.value` property on this object is the
+  // raw string representation of the expression, so we check if it’s
+  // literally "true".
+  if (
+    typeof markdownEnabledAttr.value === "object" &&
+    markdownEnabledAttr.value.type === "mdxJsxAttributeValueExpression"
+  ) {
+    return markdownEnabledAttr.value.value.trim() === "true"
+  }
+
+  return false
+}
+
+export async function transformAllHikes(
+  root: Root,
+  config: CodeHikeConfig,
+  source?: string,
+) {
   let tree = wrapInHike(root)
 
   const hikes: MdxJsxFlowElement[] = []
@@ -16,7 +57,7 @@ export async function transformAllHikes(root: Root, config: CodeHikeConfig) {
     }
   })
 
-  await Promise.all(hikes.map((h) => transformRemarkHike(h, config)))
+  await Promise.all(hikes.map((h) => transformRemarkHike(h, config, source)))
 
   return tree
 }
@@ -41,9 +82,14 @@ function wrapInHike(root: Root) {
 async function transformRemarkHike(
   node: MdxJsxFlowElement,
   config: CodeHikeConfig,
+  source?: string,
 ) {
   const section = await listToSection(node, config)
-  const { children, attributes } = sectionToAttribute(section)
+  const { children, attributes } = sectionToAttribute(
+    section,
+    markdownEnabled,
+    source,
+  )
 
   node.children = children
   node.attributes.push(...attributes)

diff --git a/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts b/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts
@@ -1,15 +1,20 @@
 import { MdxJsxAttribute, MdxJsxFlowElement } from "mdast-util-mdx-jsx"
-import {
-  HikeContent,
-  HikeSection,
-  JSXChild,
-} from "./1.1.remark-list-to-section.js"
+import { HikeSection, JSXChild } from "./1.1.remark-list-to-section.js"
 import { getObjectAttribute } from "./estree.js"
 
-export function sectionToAttribute(root: HikeSection) {
+export function sectionToAttribute(
+  root: HikeSection,
+  markdownEnabled: boolean,
+  source?: string,
+) {
   const children: JSXChild[] = getSectionContainers(root, "")
 
-  const serializableTree = getSerializableNode(root, "")
+  const serializableTree = getSerializableNode(
+    root,
+    "",
+    markdownEnabled,
+    source,
+  )
 
   return {
     children,
@@ -23,20 +28,39 @@ export function sectionToAttribute(root: HikeSection) {
   }
 }
 
-function getSerializableNode(section: HikeSection, path: string) {
+function getSerializableNode(
+  section: HikeSection,
+  path: string,
+  markdownEnabled: boolean = false,
+  source?: string,
+) {
   const newPath = path ? [path, section.name].join(".") : section.name
   const node: any = {
     children: newPath,
     title: section.title,
     _data: section._data,
   }
 
+  const markdown = computeSectionMarkdownFromContentNodes(
+    section,
+    markdownEnabled,
+    source,
+  )
+  if (markdown !== undefined) {
+    node.markdown = markdown
+  }
+
   section.children.forEach((child) => {
     if (child.type === "content") {
       return
     }
     if (child.type === "section") {
-      const childNode = getSerializableNode(child, newPath)
+      const childNode = getSerializableNode(
+        child,
+        newPath,
+        markdownEnabled,
+        source,
+      )
 
       if (child.multi) {
         node[child.name] = node[child.name] || []
@@ -64,6 +88,90 @@ function getSerializableNode(section: HikeSection, path: string) {
   return node
 }
 
+function computeSectionMarkdownFromContentNodes(
+  section: HikeSection,
+  markdownEnabled: boolean,
+  source?: string,
+): string | undefined {
+  if (!markdownEnabled || source == null) {
+    return undefined
+  }
+
+  let markdown: string | undefined
+  let pendingBrCount = 0
+
+  for (const child of section.children) {
+    if (child.type !== "content") {
+      continue
+    }
+
+    const contentNode = child.value
+
+    if (isFlowBrElement(contentNode)) {
+      pendingBrCount += 1
+      continue
+    }
+
+    if (isParagraphNode(contentNode)) {
+      let paragraph = sliceOriginalSourceByNodeOffset(source, contentNode)
+      paragraph = paragraph.trimEnd()
+
+      if (paragraph === "") {
+        continue
+      }
+
+      if (markdown === undefined) {
+        // First paragraph in this section.
+        // Each preceding flow-level <br /> adds one leading newline.
+        const leadingNewlines =
+          pendingBrCount > 0 ? "\n".repeat(pendingBrCount) : ""
+        markdown = leadingNewlines + paragraph
+      } else {
+        // For each paragraph after the first:
+        // Add one newline by default, plus one extra newline for each
+        // flow-level <br /> seen since the previous paragraph.
+        const newlineCount = 1 + pendingBrCount
+        markdown += "\n".repeat(newlineCount) + paragraph
+      }
+
+      // Reset pending flow-level <br /> spacing after applying it to this paragraph.
+      pendingBrCount = 0
+    }
+  }
+
+  if (markdown !== undefined && pendingBrCount > 0) {
+    markdown += "\n".repeat(pendingBrCount)
+  }
+
+  return markdown
+}
+
+function sliceOriginalSourceByNodeOffset(
+  source: string,
+  node: JSXChild,
+): string {
+  const start = node.position?.start?.offset
+  const end = node.position?.end?.offset
+
+  if (typeof start !== "number" || typeof end !== "number") {
+    return ""
+  }
+
+  return source.slice(start, end)
+}
+
+function isParagraphNode(node: JSXChild): boolean {
+  return node.type === "paragraph"
+}
+
+function isFlowBrElement(node: JSXChild): boolean {
+  return (
+    node.type === "mdxJsxFlowElement" &&
+    typeof node.name === "string" &&
+    node.name.toLowerCase() === "br"
+  )
+}
+
 function getSectionContainers(section: HikeSection, path: string) {
   const newPath = path ? [path, section.name].join(".") : section.name
   const children: JSXChild[] = [sectionContainer(section, newPath)]

diff --git a/packages/codehike/tests/markdown-enabled.test.ts b/packages/codehike/tests/markdown-enabled.test.ts
@@ -0,0 +1,87 @@
+import { compile, run } from "@mdx-js/mdx"
+import * as runtime from "react/jsx-runtime"
+import { expect, test } from "vitest"
+import { parse } from "../src/index"
+import { recmaCodeHike, remarkCodeHike } from "../src/mdx"
+
+async function compileToBlocks(source: string) {
+  const result = await compile(
+    { value: source, path: "/virtual/markdown-enabled.mdx" },
+    {
+      jsx: false,
+      outputFormat: "function-body",
+      remarkPlugins: [[remarkCodeHike, {}]],
+      recmaPlugins: [[recmaCodeHike, {}]],
+    },
+  )
+  const { default: Content } = await run(result, runtime)
+  return parse(Content, {
+    components: {
+      Other: () => null,
+    },
+  }) as any
+}
+
+test("uses source markdown for markdownEnabled sections", async () => {
+  const blocks = await compileToBlocks(`
+<slot markdownEnabled>
+
+# !!posts One
+
+Hello **x**
+
+| a | b |
+| - | - |
+| 1 | 2 |
+
+<Other a={1 + 2} />
+
+# !!posts Two
+
+After _it_
+
+</slot>
+`)
+
+  expect(blocks.props.posts[0].markdown).toContain("Hello **x**")
+  expect(blocks.props.posts[0].markdown).toContain("| a | b |")
+  expect(blocks.props.posts[0].markdown).not.toContain("<Other")
+  expect(blocks.props.posts[1].markdown).toBe("After _it_")
+})
+
+test("does not add markdown when markdownEnabled is not set", async () => {
+  const blocks = await compileToBlocks(`
+<slot>
+
+# !!posts One
+
+Hello **x**
+
+</slot>
+`)
+
+  expect(blocks.props.posts[0].markdown).toBeUndefined()
+})
+
+test("preserves <br /> spacing semantics around paragraphs", async () => {
+  const blocks = await compileToBlocks(`
+<slot markdownEnabled>
+
+# !!posts One
+
+<br />
+
+First
+
+<br />
+<br />
+
+Second
+
+<br />
+
+</slot>
+`)
+
+  expect(blocks.props.posts[0].markdown).toBe("\nFirst\n\n\nSecond\n")
+})