@@ -1263,7 +1263,18 @@ pub const PageFormatter = struct {
12631263 '&' = > try writer .writeAll ("&" ),
12641264 '"' = > try writer .writeAll (""" ),
12651265 '\' ' = > try writer .writeAll ("'" ),
1266- else = > try writer .print ("{u}" , .{codepoint }),
1266+ else = > {
1267+ // For HTML, emit ASCII (< 0x80) directly, but encode
1268+ // all non-ASCII as numeric entities to avoid encoding
1269+ // detection issues (fixes #9426). We can't set the
1270+ // meta tag because we emit partial HTML so this ensures
1271+ // proper unicode handling.
1272+ if (codepoint < 0x80 ) {
1273+ try writer .print ("{u}" , .{codepoint });
1274+ } else {
1275+ try writer .print ("&#{d};" , .{codepoint });
1276+ }
1277+ },
12671278 }
12681279 },
12691280 }
@@ -5065,6 +5076,104 @@ test "Page html with escaping" {
50655076 try testing .expectEqual (Coordinate { .x = 11 , .y = 0 }, point_map .items [offset + 30 ]);
50665077}
50675078
5079+ test "Page html with unicode as numeric entities" {
5080+ const testing = std .testing ;
5081+ const alloc = testing .allocator ;
5082+
5083+ var builder : std.Io.Writer.Allocating = .init (alloc );
5084+ defer builder .deinit ();
5085+
5086+ var t = try Terminal .init (alloc , .{
5087+ .cols = 80 ,
5088+ .rows = 24 ,
5089+ });
5090+ defer t .deinit (alloc );
5091+
5092+ var s = t .vtStream ();
5093+ defer s .deinit ();
5094+
5095+ // Box drawing characters that caused issue #9426
5096+ try s .nextSlice ("╰─ ❯" );
5097+
5098+ const pages = & t .screen .pages ;
5099+ const page = & pages .pages .last .? .data ;
5100+ var formatter : PageFormatter = .init (page , .{ .emit = .html });
5101+
5102+ try formatter .format (& builder .writer );
5103+ const output = builder .writer .buffered ();
5104+
5105+ // Expected: box drawing chars as numeric entities
5106+ // ╰ = U+2570 = 9584, ─ = U+2500 = 9472, ❯ = U+276F = 10095
5107+ try testing .expectEqualStrings (
5108+ "<div style=\" font-family: monospace; white-space: pre;\" >╰─ ❯</div>" ,
5109+ output ,
5110+ );
5111+ }
5112+
5113+ test "Page html ascii characters unchanged" {
5114+ const testing = std .testing ;
5115+ const alloc = testing .allocator ;
5116+
5117+ var builder : std.Io.Writer.Allocating = .init (alloc );
5118+ defer builder .deinit ();
5119+
5120+ var t = try Terminal .init (alloc , .{
5121+ .cols = 80 ,
5122+ .rows = 24 ,
5123+ });
5124+ defer t .deinit (alloc );
5125+
5126+ var s = t .vtStream ();
5127+ defer s .deinit ();
5128+
5129+ try s .nextSlice ("hello world" );
5130+
5131+ const pages = & t .screen .pages ;
5132+ const page = & pages .pages .last .? .data ;
5133+ var formatter : PageFormatter = .init (page , .{ .emit = .html });
5134+
5135+ try formatter .format (& builder .writer );
5136+ const output = builder .writer .buffered ();
5137+
5138+ // ASCII should be emitted directly
5139+ try testing .expectEqualStrings (
5140+ "<div style=\" font-family: monospace; white-space: pre;\" >hello world</div>" ,
5141+ output ,
5142+ );
5143+ }
5144+
5145+ test "Page html mixed ascii and unicode" {
5146+ const testing = std .testing ;
5147+ const alloc = testing .allocator ;
5148+
5149+ var builder : std.Io.Writer.Allocating = .init (alloc );
5150+ defer builder .deinit ();
5151+
5152+ var t = try Terminal .init (alloc , .{
5153+ .cols = 80 ,
5154+ .rows = 24 ,
5155+ });
5156+ defer t .deinit (alloc );
5157+
5158+ var s = t .vtStream ();
5159+ defer s .deinit ();
5160+
5161+ try s .nextSlice ("test ╰─❯ ok" );
5162+
5163+ const pages = & t .screen .pages ;
5164+ const page = & pages .pages .last .? .data ;
5165+ var formatter : PageFormatter = .init (page , .{ .emit = .html });
5166+
5167+ try formatter .format (& builder .writer );
5168+ const output = builder .writer .buffered ();
5169+
5170+ // Mix of ASCII and Unicode entities
5171+ try testing .expectEqualStrings (
5172+ "<div style=\" font-family: monospace; white-space: pre;\" >test ╰─❯ ok</div>" ,
5173+ output ,
5174+ );
5175+ }
5176+
50685177test "Page VT with palette option emits RGB" {
50695178 const testing = std .testing ;
50705179 const alloc = testing .allocator ;
0 commit comments