code.gitea.io/gitea@v1.19.3/modules/charset/htmlstream.go (about)

     1  // Copyright 2022 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package charset
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  
    10  	"golang.org/x/net/html"
    11  )
    12  
    13  // HTMLStreamer represents a SAX-like interface for HTML
    14  type HTMLStreamer interface {
    15  	Error(err error) error
    16  	Doctype(data string) error
    17  	Comment(data string) error
    18  	StartTag(data string, attrs ...html.Attribute) error
    19  	SelfClosingTag(data string, attrs ...html.Attribute) error
    20  	EndTag(data string) error
    21  	Text(data string) error
    22  }
    23  
    24  // PassthroughHTMLStreamer is a passthrough streamer
    25  type PassthroughHTMLStreamer struct {
    26  	next HTMLStreamer
    27  }
    28  
    29  func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
    30  	return &PassthroughHTMLStreamer{next: next}
    31  }
    32  
    33  var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
    34  
    35  // Error tells the next streamer in line that there is an error
    36  func (p *PassthroughHTMLStreamer) Error(err error) error {
    37  	return p.next.Error(err)
    38  }
    39  
    40  // Doctype tells the next streamer what the doctype is
    41  func (p *PassthroughHTMLStreamer) Doctype(data string) error {
    42  	return p.next.Doctype(data)
    43  }
    44  
    45  // Comment tells the next streamer there is a comment
    46  func (p *PassthroughHTMLStreamer) Comment(data string) error {
    47  	return p.next.Comment(data)
    48  }
    49  
    50  // StartTag tells the next streamer there is a starting tag
    51  func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
    52  	return p.next.StartTag(data, attrs...)
    53  }
    54  
    55  // SelfClosingTag tells the next streamer there is a self-closing tag
    56  func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
    57  	return p.next.SelfClosingTag(data, attrs...)
    58  }
    59  
    60  // EndTag tells the next streamer there is a end tag
    61  func (p *PassthroughHTMLStreamer) EndTag(data string) error {
    62  	return p.next.EndTag(data)
    63  }
    64  
    65  // Text tells the next streamer there is a text
    66  func (p *PassthroughHTMLStreamer) Text(data string) error {
    67  	return p.next.Text(data)
    68  }
    69  
    70  // HTMLStreamWriter acts as a writing sink
    71  type HTMLStreamerWriter struct {
    72  	io.Writer
    73  	err error
    74  }
    75  
    76  // Write implements io.Writer
    77  func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
    78  	if h.err != nil {
    79  		return 0, h.err
    80  	}
    81  	return h.Writer.Write(data)
    82  }
    83  
    84  // Write implements io.StringWriter
    85  func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
    86  	if h.err != nil {
    87  		return 0, h.err
    88  	}
    89  	return h.Writer.Write([]byte(data))
    90  }
    91  
    92  // Error tells the next streamer in line that there is an error
    93  func (h *HTMLStreamerWriter) Error(err error) error {
    94  	if h.err == nil {
    95  		h.err = err
    96  	}
    97  	return h.err
    98  }
    99  
   100  // Doctype tells the next streamer what the doctype is
   101  func (h *HTMLStreamerWriter) Doctype(data string) error {
   102  	_, h.err = h.WriteString("<!DOCTYPE " + data + ">")
   103  	return h.err
   104  }
   105  
   106  // Comment tells the next streamer there is a comment
   107  func (h *HTMLStreamerWriter) Comment(data string) error {
   108  	_, h.err = h.WriteString("<!--" + data + "-->")
   109  	return h.err
   110  }
   111  
   112  // StartTag tells the next streamer there is a starting tag
   113  func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
   114  	return h.startTag(data, attrs, false)
   115  }
   116  
   117  // SelfClosingTag tells the next streamer there is a self-closing tag
   118  func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
   119  	return h.startTag(data, attrs, true)
   120  }
   121  
   122  func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
   123  	if _, h.err = h.WriteString("<" + data); h.err != nil {
   124  		return h.err
   125  	}
   126  	for _, attr := range attrs {
   127  		if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
   128  			return h.err
   129  		}
   130  	}
   131  	if selfclosing {
   132  		if _, h.err = h.WriteString("/>"); h.err != nil {
   133  			return h.err
   134  		}
   135  	} else {
   136  		if _, h.err = h.WriteString(">"); h.err != nil {
   137  			return h.err
   138  		}
   139  	}
   140  	return h.err
   141  }
   142  
   143  // EndTag tells the next streamer there is a end tag
   144  func (h *HTMLStreamerWriter) EndTag(data string) error {
   145  	_, h.err = h.WriteString("</" + data + ">")
   146  	return h.err
   147  }
   148  
   149  // Text tells the next streamer there is a text
   150  func (h *HTMLStreamerWriter) Text(data string) error {
   151  	_, h.err = h.WriteString(html.EscapeString(data))
   152  	return h.err
   153  }
   154  
   155  // StreamHTML streams an html to a provided streamer
   156  func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
   157  	tokenizer := html.NewTokenizer(source)
   158  	for {
   159  		tt := tokenizer.Next()
   160  		switch tt {
   161  		case html.ErrorToken:
   162  			if tokenizer.Err() != io.EOF {
   163  				return tokenizer.Err()
   164  			}
   165  			return nil
   166  		case html.DoctypeToken:
   167  			token := tokenizer.Token()
   168  			if err := streamer.Doctype(token.Data); err != nil {
   169  				return err
   170  			}
   171  		case html.CommentToken:
   172  			token := tokenizer.Token()
   173  			if err := streamer.Comment(token.Data); err != nil {
   174  				return err
   175  			}
   176  		case html.StartTagToken:
   177  			token := tokenizer.Token()
   178  			if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
   179  				return err
   180  			}
   181  		case html.SelfClosingTagToken:
   182  			token := tokenizer.Token()
   183  			if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
   184  				return err
   185  			}
   186  		case html.EndTagToken:
   187  			token := tokenizer.Token()
   188  			if err := streamer.EndTag(token.Data); err != nil {
   189  				return err
   190  			}
   191  		case html.TextToken:
   192  			token := tokenizer.Token()
   193  			if err := streamer.Text(token.Data); err != nil {
   194  				return err
   195  			}
   196  		default:
   197  			return fmt.Errorf("unknown type of token: %d", tt)
   198  		}
   199  	}
   200  }