@@ -2,14 +2,13 @@ package codegen
22
33import (
44 "bufio"
5+ "encoding/xml"
56 "fmt"
67 "html"
78 "io"
9+ "log"
810 "regexp"
911 "strings"
10-
11- xhtml "golang.org/x/net/html"
12- "golang.org/x/net/html/atom"
1312)
1413
1514var reNewline = regexp .MustCompile (`\r?\n` )
@@ -152,7 +151,11 @@ func getLeadingWhitespace(v string) string {
152151
153152// generateDoc will generate the proper doc string for html encoded or plain text doc entries.
154153func generateDoc (htmlSrc string ) string {
155- tokenizer := xhtml .NewTokenizer (strings .NewReader (htmlSrc ))
154+ tokenizer := xml .NewDecoder (strings .NewReader (htmlSrc ))
155+ tokenizer .Strict = false
156+ tokenizer .AutoClose = xml .HTMLAutoClose
157+ tokenizer .Entity = xml .HTMLEntity
158+
156159 var builder strings.Builder
157160 if err := encodeHTMLToText (& builder , tokenizer ); err != nil {
158161 panic (fmt .Sprintf ("failed to generated docs, %v" , err ))
@@ -168,31 +171,30 @@ type stringWriter interface {
168171 WriteString (string ) (int , error )
169172}
170173
171- func encodeHTMLToText (w stringWriter , z * xhtml. Tokenizer ) error {
174+ func encodeHTMLToText (w stringWriter , z * xml. Decoder ) error {
172175 encoder := newHTMLTokenEncoder (w )
173176 defer encoder .Flush ()
174177
175178 for {
176- tt := z .Next ()
177- if tt == xhtml .ErrorToken {
178- if err := z .Err (); err == io .EOF {
179- return nil
180- } else if err != nil {
181- return err
182- }
179+ tt , err := z .Token ()
180+ if err == io .EOF {
181+ return nil
182+ }
183+ if err != nil {
184+ return err
183185 }
184186
185- if err := encoder .Encode (z . Token () ); err != nil {
187+ if err := encoder .Encode (tt ); err != nil {
186188 return err
187189 }
188190 }
189191}
190192
191193type htmlTokenHandler interface {
192- OnStartTagToken (xhtml. Token ) htmlTokenHandler
193- OnEndTagToken (xhtml .Token , bool )
194- OnSelfClosingTagToken (xhtml .Token )
195- OnTextTagToken (xhtml. Token )
194+ OnStartTagToken (xml. StartElement ) htmlTokenHandler
195+ OnEndTagToken (xml .Token , bool )
196+ OnSelfClosingTagToken (xml .Token )
197+ OnTextTagToken (xml. CharData )
196198}
197199
198200type htmlTokenEncoder struct {
@@ -220,44 +222,45 @@ func newHTMLTokenEncoder(w stringWriter) *htmlTokenEncoder {
220222}
221223
222224func (e * htmlTokenEncoder ) Flush () error {
223- e .baseHandler .handler .OnEndTagToken (xhtml. Token { Type : xhtml . TextToken } , true )
225+ e .baseHandler .handler .OnEndTagToken (xml . CharData ([] byte {}) , true )
224226 return nil
225227}
226228
227- func (e * htmlTokenEncoder ) Encode (token xhtml .Token ) error {
229+ func (e * htmlTokenEncoder ) Encode (token xml .Token ) error {
228230 h := e .baseHandler
229231 if len (e .handlers ) != 0 {
230232 h = e .handlers [len (e .handlers )- 1 ]
231233 }
232234
233- switch token .Type {
234- case xhtml . StartTagToken :
235+ switch v := token .( type ) {
236+ case xml. StartElement :
235237 e .depth ++
236238
237- next := h .handler .OnStartTagToken (token )
239+ next := h .handler .OnStartTagToken (v )
238240 if next != nil {
239241 e .handlers = append (e .handlers , tokenHandlerItem {
240242 handler : next ,
241243 depth : e .depth ,
242244 })
243245 }
244246
245- case xhtml . EndTagToken :
247+ case xml. EndElement :
246248 handlerBlockClosing := e .depth == h .depth
247249
248250 h .handler .OnEndTagToken (token , handlerBlockClosing )
249251
250252 // Remove all but the root handler as the handler is no longer needed.
251- if handlerBlockClosing {
253+ if handlerBlockClosing && len ( e . handlers ) != 0 {
252254 e .handlers = e .handlers [:len (e .handlers )- 1 ]
253255 }
254256 e .depth --
257+ if e .depth < 0 {
258+ log .Printf ("ignoring unexpected closing tag, %v" , token )
259+ e .depth = 0
260+ }
255261
256- case xhtml .SelfClosingTagToken :
257- h .handler .OnSelfClosingTagToken (token )
258-
259- case xhtml .TextToken :
260- h .handler .OnTextTagToken (token )
262+ case xml.CharData :
263+ h .handler .OnTextTagToken (v )
261264 }
262265
263266 return nil
@@ -267,11 +270,11 @@ type baseTokenHandler struct {
267270 w stringWriter
268271}
269272
270- func (e * baseTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler { return nil }
271- func (e * baseTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {}
272- func (e * baseTokenHandler ) OnSelfClosingTagToken (token xhtml .Token ) {}
273- func (e * baseTokenHandler ) OnTextTagToken (token xhtml. Token ) {
274- e .w .WriteString (token . Data )
273+ func (e * baseTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler { return nil }
274+ func (e * baseTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {}
275+ func (e * baseTokenHandler ) OnSelfClosingTagToken (token xml .Token ) {}
276+ func (e * baseTokenHandler ) OnTextTagToken (token xml. CharData ) {
277+ e .w .WriteString (string ( token ) )
275278}
276279
277280type blockTokenHandler struct {
@@ -295,27 +298,27 @@ func newBlockTokenHandler(w stringWriter) *blockTokenHandler {
295298 },
296299 }
297300}
298- func (e * blockTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
301+ func (e * blockTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
299302 e .started = true
300303 if e .newlineBeforeNextBlock {
301304 e .w .WriteString ("\n " )
302305 e .newlineBeforeNextBlock = false
303306 }
304307
305- switch token .DataAtom {
306- case atom . A :
308+ switch token .Name . Local {
309+ case "a" :
307310 return newLinkTokenHandler (e .w , token )
308- case atom . Ul :
311+ case "ul" :
309312 e .w .WriteString ("\n " )
310313 e .newlineBeforeNextBlock = true
311314 return newListTokenHandler (e .w )
312315
313- case atom . Div , atom . Dt , atom . P , atom . H1 , atom . H2 , atom . H3 , atom . H4 , atom . H5 , atom . H6 :
316+ case "div" , "dt" , "p" , "h1" , "h2" , "h3" , "h4" , "h5" , "h6" :
314317 e .w .WriteString ("\n " )
315318 e .newlineBeforeNextBlock = true
316319 return newBlockTokenHandler (e .w )
317320
318- case atom . Pre , atom . Code :
321+ case "pre" , "code" :
319322 if e .rootBlock {
320323 e .w .WriteString ("\n " )
321324 e .w .WriteString (indent )
@@ -326,7 +329,7 @@ func (e *blockTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler
326329
327330 return nil
328331}
329- func (e * blockTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
332+ func (e * blockTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
330333 if ! blockClosing {
331334 return
332335 }
@@ -340,34 +343,34 @@ func (e *blockTokenHandler) OnEndTagToken(token xhtml.Token, blockClosing bool)
340343 e .strBuilder .Reset ()
341344}
342345
343- func (e * blockTokenHandler ) OnTextTagToken (token xhtml. Token ) {
346+ func (e * blockTokenHandler ) OnTextTagToken (token xml. CharData ) {
344347 if e .newlineBeforeNextBlock {
345348 e .w .WriteString ("\n " )
346349 e .newlineBeforeNextBlock = false
347350 }
348351 if ! e .started {
349- token . Data = strings .TrimLeft (token . Data , " \t \n " )
352+ token = xml . CharData ( strings .TrimLeft (string ( token ) , " \t \n " ) )
350353 }
351- if len (token . Data ) != 0 {
354+ if len (token ) != 0 {
352355 e .started = true
353356 }
354357 e .baseTokenHandler .OnTextTagToken (token )
355358}
356359
357360type linkTokenHandler struct {
358361 baseTokenHandler
359- linkToken xhtml. Token
362+ linkToken xml. StartElement
360363}
361364
362- func newLinkTokenHandler (w stringWriter , token xhtml. Token ) * linkTokenHandler {
365+ func newLinkTokenHandler (w stringWriter , token xml. StartElement ) * linkTokenHandler {
363366 return & linkTokenHandler {
364367 baseTokenHandler : baseTokenHandler {
365368 w : w ,
366369 },
367370 linkToken : token ,
368371 }
369372}
370- func (e * linkTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
373+ func (e * linkTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
371374 if ! blockClosing {
372375 return
373376 }
@@ -390,9 +393,9 @@ func newListTokenHandler(w stringWriter) *listTokenHandler {
390393 },
391394 }
392395}
393- func (e * listTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
394- switch token .DataAtom {
395- case atom . Li :
396+ func (e * listTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
397+ switch token .Name . Local {
398+ case "li" :
396399 if e .items >= 1 {
397400 e .w .WriteString ("\n \n " )
398401 }
@@ -402,7 +405,7 @@ func (e *listTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler {
402405 return nil
403406}
404407
405- func (e * listTokenHandler ) OnTextTagToken (token xhtml. Token ) {
408+ func (e * listTokenHandler ) OnTextTagToken (token xml. CharData ) {
406409 // Squash whitespace between list and items
407410}
408411
@@ -423,14 +426,14 @@ func newListItemTokenHandler(w stringWriter) *listItemTokenHandler {
423426 },
424427 }
425428}
426- func (e * listItemTokenHandler ) OnStartTagToken (token xhtml. Token ) htmlTokenHandler {
427- switch token .DataAtom {
428- case atom . P :
429+ func (e * listItemTokenHandler ) OnStartTagToken (token xml. StartElement ) htmlTokenHandler {
430+ switch token .Name . Local {
431+ case "p" :
429432 return newBlockTokenHandler (e .w )
430433 }
431434 return nil
432435}
433- func (e * listItemTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
436+ func (e * listItemTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
434437 if ! blockClosing {
435438 return
436439 }
@@ -456,18 +459,18 @@ func newTrimSpaceTokenHandler(w stringWriter) *trimSpaceTokenHandler {
456459 },
457460 }
458461}
459- func (e * trimSpaceTokenHandler ) OnEndTagToken (token xhtml .Token , blockClosing bool ) {
462+ func (e * trimSpaceTokenHandler ) OnEndTagToken (token xml .Token , blockClosing bool ) {
460463 if ! blockClosing {
461464 return
462465 }
463466
464467 e .origWriter .WriteString (strings .TrimSpace (e .strBuilder .String ()))
465468}
466469
467- func getHTMLTokenAttr (attr []xhtml. Attribute , name string ) (string , bool ) {
470+ func getHTMLTokenAttr (attr []xml. Attr , name string ) (string , bool ) {
468471 for _ , a := range attr {
469- if strings .EqualFold (a .Key , name ) {
470- return a .Val , true
472+ if strings .EqualFold (a .Name . Local , name ) {
473+ return a .Value , true
471474 }
472475 }
473476 return "" , false
0 commit comments