Skip to content

Commit 7d6cd12

Browse files
authored
terminal: emit non-ASCII characters as Unicode codepoints for HTML (#9427)
Fixes #9426 Since we can't set the meta charset tag since we emit partial HTML, we use codepoint entities like `{` for non-ASCII characters to ensure proper rendering.
2 parents b043623 + 05d2f88 commit 7d6cd12

File tree

1 file changed

+110
-1
lines changed

1 file changed

+110
-1
lines changed

‎src/terminal/formatter.zig‎

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1263,7 +1263,18 @@ pub const PageFormatter = struct {
12631263
'&' => try writer.writeAll("&"),
12641264
'"' => try writer.writeAll("""),
12651265
'\'' => try writer.writeAll("'"),
1266-
else => try writer.print("{u}", .{codepoint}),
1266+
else => {
1267+
// For HTML, emit ASCII (< 0x80) directly, but encode
1268+
// all non-ASCII as numeric entities to avoid encoding
1269+
// detection issues (fixes #9426). We can't set the
1270+
// meta tag because we emit partial HTML so this ensures
1271+
// proper unicode handling.
1272+
if (codepoint < 0x80) {
1273+
try writer.print("{u}", .{codepoint});
1274+
} else {
1275+
try writer.print("&#{d};", .{codepoint});
1276+
}
1277+
},
12671278
}
12681279
},
12691280
}
@@ -5065,6 +5076,104 @@ test "Page html with escaping" {
50655076
try testing.expectEqual(Coordinate{ .x = 11, .y = 0 }, point_map.items[offset + 30]);
50665077
}
50675078

5079+
test "Page html with unicode as numeric entities" {
5080+
const testing = std.testing;
5081+
const alloc = testing.allocator;
5082+
5083+
var builder: std.Io.Writer.Allocating = .init(alloc);
5084+
defer builder.deinit();
5085+
5086+
var t = try Terminal.init(alloc, .{
5087+
.cols = 80,
5088+
.rows = 24,
5089+
});
5090+
defer t.deinit(alloc);
5091+
5092+
var s = t.vtStream();
5093+
defer s.deinit();
5094+
5095+
// Box drawing characters that caused issue #9426
5096+
try s.nextSlice("╰─ ❯");
5097+
5098+
const pages = &t.screen.pages;
5099+
const page = &pages.pages.last.?.data;
5100+
var formatter: PageFormatter = .init(page, .{ .emit = .html });
5101+
5102+
try formatter.format(&builder.writer);
5103+
const output = builder.writer.buffered();
5104+
5105+
// Expected: box drawing chars as numeric entities
5106+
// ╰ = U+2570 = 9584, ─ = U+2500 = 9472, ❯ = U+276F = 10095
5107+
try testing.expectEqualStrings(
5108+
"<div style=\"font-family: monospace; white-space: pre;\">&#9584;&#9472; &#10095;</div>",
5109+
output,
5110+
);
5111+
}
5112+
5113+
test "Page html ascii characters unchanged" {
5114+
const testing = std.testing;
5115+
const alloc = testing.allocator;
5116+
5117+
var builder: std.Io.Writer.Allocating = .init(alloc);
5118+
defer builder.deinit();
5119+
5120+
var t = try Terminal.init(alloc, .{
5121+
.cols = 80,
5122+
.rows = 24,
5123+
});
5124+
defer t.deinit(alloc);
5125+
5126+
var s = t.vtStream();
5127+
defer s.deinit();
5128+
5129+
try s.nextSlice("hello world");
5130+
5131+
const pages = &t.screen.pages;
5132+
const page = &pages.pages.last.?.data;
5133+
var formatter: PageFormatter = .init(page, .{ .emit = .html });
5134+
5135+
try formatter.format(&builder.writer);
5136+
const output = builder.writer.buffered();
5137+
5138+
// ASCII should be emitted directly
5139+
try testing.expectEqualStrings(
5140+
"<div style=\"font-family: monospace; white-space: pre;\">hello world</div>",
5141+
output,
5142+
);
5143+
}
5144+
5145+
test "Page html mixed ascii and unicode" {
5146+
const testing = std.testing;
5147+
const alloc = testing.allocator;
5148+
5149+
var builder: std.Io.Writer.Allocating = .init(alloc);
5150+
defer builder.deinit();
5151+
5152+
var t = try Terminal.init(alloc, .{
5153+
.cols = 80,
5154+
.rows = 24,
5155+
});
5156+
defer t.deinit(alloc);
5157+
5158+
var s = t.vtStream();
5159+
defer s.deinit();
5160+
5161+
try s.nextSlice("test ╰─❯ ok");
5162+
5163+
const pages = &t.screen.pages;
5164+
const page = &pages.pages.last.?.data;
5165+
var formatter: PageFormatter = .init(page, .{ .emit = .html });
5166+
5167+
try formatter.format(&builder.writer);
5168+
const output = builder.writer.buffered();
5169+
5170+
// Mix of ASCII and Unicode entities
5171+
try testing.expectEqualStrings(
5172+
"<div style=\"font-family: monospace; white-space: pre;\">test &#9584;&#9472;&#10095; ok</div>",
5173+
output,
5174+
);
5175+
}
5176+
50685177
test "Page VT with palette option emits RGB" {
50695178
const testing = std.testing;
50705179
const alloc = testing.allocator;

0 commit comments

Comments
 (0)