code.gitea.io/gitea@v1.19.3/modules/charset/escape.go (about)

     1  // Copyright 2022 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  //go:generate go run invisible/generate.go -v -o ./invisible_gen.go
     5  
     6  //go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json
     7  
     8  package charset
     9  
    10  import (
    11  	"bufio"
    12  	"io"
    13  	"strings"
    14  
    15  	"code.gitea.io/gitea/modules/log"
    16  	"code.gitea.io/gitea/modules/translation"
    17  )
    18  
    19  // RuneNBSP is the codepoint for NBSP
    20  const RuneNBSP = 0xa0
    21  
    22  // EscapeControlHTML escapes the unicode control sequences in a provided html document
    23  func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
    24  	sb := &strings.Builder{}
    25  	outputStream := &HTMLStreamerWriter{Writer: sb}
    26  	streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
    27  
    28  	if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
    29  		streamer.escaped.HasError = true
    30  		log.Error("Error whilst escaping: %v", err)
    31  	}
    32  	return streamer.escaped, sb.String()
    33  }
    34  
    35  // EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
    36  func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
    37  	outputStream := &HTMLStreamerWriter{Writer: writer}
    38  	streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
    39  
    40  	if err = StreamHTML(reader, streamer); err != nil {
    41  		streamer.escaped.HasError = true
    42  		log.Error("Error whilst escaping: %v", err)
    43  	}
    44  	return streamer.escaped, err
    45  }
    46  
    47  // EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
    48  func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
    49  	bufRd := bufio.NewReader(reader)
    50  	outputStream := &HTMLStreamerWriter{Writer: writer}
    51  	streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
    52  
    53  	for {
    54  		line, rdErr := bufRd.ReadString('\n')
    55  		if len(line) > 0 {
    56  			if err := streamer.Text(line); err != nil {
    57  				streamer.escaped.HasError = true
    58  				log.Error("Error whilst escaping: %v", err)
    59  				return streamer.escaped, err
    60  			}
    61  		}
    62  		if rdErr != nil {
    63  			if rdErr != io.EOF {
    64  				err = rdErr
    65  			}
    66  			break
    67  		}
    68  	}
    69  	return streamer.escaped, err
    70  }
    71  
    72  // EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
    73  func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
    74  	sb := &strings.Builder{}
    75  	outputStream := &HTMLStreamerWriter{Writer: sb}
    76  	streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
    77  
    78  	if err := streamer.Text(text); err != nil {
    79  		streamer.escaped.HasError = true
    80  		log.Error("Error whilst escaping: %v", err)
    81  	}
    82  	return streamer.escaped, sb.String()
    83  }