github.com/whiteCcinn/protobuf-go@v1.0.9/internal/encoding/text/encode.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package text
     6  
     7  import (
     8  	"math"
     9  	"math/bits"
    10  	"strconv"
    11  	"strings"
    12  	"unicode/utf8"
    13  
    14  	"github.com/whiteCcinn/protobuf-go/internal/detrand"
    15  	"github.com/whiteCcinn/protobuf-go/internal/errors"
    16  )
    17  
    18  // encType represents an encoding type.
    19  type encType uint8
    20  
    21  const (
    22  	_ encType = (1 << iota) / 2
    23  	name
    24  	scalar
    25  	messageOpen
    26  	messageClose
    27  )
    28  
    29  // Encoder provides methods to write out textproto constructs and values. The user is
    30  // responsible for producing valid sequences of constructs and values.
    31  type Encoder struct {
    32  	encoderState
    33  
    34  	indent      string
    35  	delims      [2]byte
    36  	outputASCII bool
    37  }
    38  
    39  type encoderState struct {
    40  	lastType encType
    41  	indents  []byte
    42  	out      []byte
    43  }
    44  
    45  // NewEncoder returns an Encoder.
    46  //
    47  // If indent is a non-empty string, it causes every entry in a List or Message
    48  // to be preceded by the indent and trailed by a newline.
    49  //
    50  // If delims is not the zero value, it controls the delimiter characters used
    51  // for messages (e.g., "{}" vs "<>").
    52  //
    53  // If outputASCII is true, strings will be serialized in such a way that
    54  // multi-byte UTF-8 sequences are escaped. This property ensures that the
    55  // overall output is ASCII (as opposed to UTF-8).
    56  func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
    57  	e := &Encoder{}
    58  	if len(indent) > 0 {
    59  		if strings.Trim(indent, " \t") != "" {
    60  			return nil, errors.New("indent may only be composed of space and tab characters")
    61  		}
    62  		e.indent = indent
    63  	}
    64  	switch delims {
    65  	case [2]byte{0, 0}:
    66  		e.delims = [2]byte{'{', '}'}
    67  	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
    68  		e.delims = delims
    69  	default:
    70  		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
    71  	}
    72  	e.outputASCII = outputASCII
    73  
    74  	return e, nil
    75  }
    76  
    77  // Bytes returns the content of the written bytes.
    78  func (e *Encoder) Bytes() []byte {
    79  	return e.out
    80  }
    81  
    82  // StartMessage writes out the '{' or '<' symbol.
    83  func (e *Encoder) StartMessage() {
    84  	e.prepareNext(messageOpen)
    85  	e.out = append(e.out, e.delims[0])
    86  }
    87  
    88  // EndMessage writes out the '}' or '>' symbol.
    89  func (e *Encoder) EndMessage() {
    90  	e.prepareNext(messageClose)
    91  	e.out = append(e.out, e.delims[1])
    92  }
    93  
    94  // WriteName writes out the field name and the separator ':'.
    95  func (e *Encoder) WriteName(s string) {
    96  	e.prepareNext(name)
    97  	e.out = append(e.out, s...)
    98  	e.out = append(e.out, ':')
    99  }
   100  
   101  // WriteBool writes out the given boolean value.
   102  func (e *Encoder) WriteBool(b bool) {
   103  	if b {
   104  		e.WriteLiteral("true")
   105  	} else {
   106  		e.WriteLiteral("false")
   107  	}
   108  }
   109  
   110  // WriteString writes out the given string value.
   111  func (e *Encoder) WriteString(s string) {
   112  	e.prepareNext(scalar)
   113  	e.out = appendString(e.out, s, e.outputASCII)
   114  }
   115  
   116  func appendString(out []byte, in string, outputASCII bool) []byte {
   117  	out = append(out, '"')
   118  	i := indexNeedEscapeInString(in)
   119  	in, out = in[i:], append(out, in[:i]...)
   120  	for len(in) > 0 {
   121  		switch r, n := utf8.DecodeRuneInString(in); {
   122  		case r == utf8.RuneError && n == 1:
   123  			// We do not report invalid UTF-8 because strings in the text format
   124  			// are used to represent both the proto string and bytes type.
   125  			r = rune(in[0])
   126  			fallthrough
   127  		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
   128  			out = append(out, '\\')
   129  			switch r {
   130  			case '"', '\\':
   131  				out = append(out, byte(r))
   132  			case '\n':
   133  				out = append(out, 'n')
   134  			case '\r':
   135  				out = append(out, 'r')
   136  			case '\t':
   137  				out = append(out, 't')
   138  			default:
   139  				out = append(out, 'x')
   140  				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
   141  				out = strconv.AppendUint(out, uint64(r), 16)
   142  			}
   143  			in = in[n:]
   144  		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
   145  			out = append(out, '\\')
   146  			if r <= math.MaxUint16 {
   147  				out = append(out, 'u')
   148  				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
   149  				out = strconv.AppendUint(out, uint64(r), 16)
   150  			} else {
   151  				out = append(out, 'U')
   152  				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
   153  				out = strconv.AppendUint(out, uint64(r), 16)
   154  			}
   155  			in = in[n:]
   156  		default:
   157  			i := indexNeedEscapeInString(in[n:])
   158  			in, out = in[n+i:], append(out, in[:n+i]...)
   159  		}
   160  	}
   161  	out = append(out, '"')
   162  	return out
   163  }
   164  
   165  // indexNeedEscapeInString returns the index of the character that needs
   166  // escaping. If no characters need escaping, this returns the input length.
   167  func indexNeedEscapeInString(s string) int {
   168  	for i := 0; i < len(s); i++ {
   169  		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
   170  			return i
   171  		}
   172  	}
   173  	return len(s)
   174  }
   175  
   176  // WriteFloat writes out the given float value for given bitSize.
   177  func (e *Encoder) WriteFloat(n float64, bitSize int) {
   178  	e.prepareNext(scalar)
   179  	e.out = appendFloat(e.out, n, bitSize)
   180  }
   181  
   182  func appendFloat(out []byte, n float64, bitSize int) []byte {
   183  	switch {
   184  	case math.IsNaN(n):
   185  		return append(out, "nan"...)
   186  	case math.IsInf(n, +1):
   187  		return append(out, "inf"...)
   188  	case math.IsInf(n, -1):
   189  		return append(out, "-inf"...)
   190  	default:
   191  		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
   192  	}
   193  }
   194  
   195  // WriteInt writes out the given signed integer value.
   196  func (e *Encoder) WriteInt(n int64) {
   197  	e.prepareNext(scalar)
   198  	e.out = append(e.out, strconv.FormatInt(n, 10)...)
   199  }
   200  
   201  // WriteUint writes out the given unsigned integer value.
   202  func (e *Encoder) WriteUint(n uint64) {
   203  	e.prepareNext(scalar)
   204  	e.out = append(e.out, strconv.FormatUint(n, 10)...)
   205  }
   206  
   207  // WriteLiteral writes out the given string as a literal value without quotes.
   208  // This is used for writing enum literal strings.
   209  func (e *Encoder) WriteLiteral(s string) {
   210  	e.prepareNext(scalar)
   211  	e.out = append(e.out, s...)
   212  }
   213  
   214  // prepareNext adds possible space and indentation for the next value based
   215  // on last encType and indent option. It also updates e.lastType to next.
   216  func (e *Encoder) prepareNext(next encType) {
   217  	defer func() {
   218  		e.lastType = next
   219  	}()
   220  
   221  	// Single line.
   222  	if len(e.indent) == 0 {
   223  		// Add space after each field before the next one.
   224  		if e.lastType&(scalar|messageClose) != 0 && next == name {
   225  			e.out = append(e.out, ' ')
   226  			// Add a random extra space to make output unstable.
   227  			if detrand.Bool() {
   228  				e.out = append(e.out, ' ')
   229  			}
   230  		}
   231  		return
   232  	}
   233  
   234  	// Multi-line.
   235  	switch {
   236  	case e.lastType == name:
   237  		e.out = append(e.out, ' ')
   238  		// Add a random extra space after name: to make output unstable.
   239  		if detrand.Bool() {
   240  			e.out = append(e.out, ' ')
   241  		}
   242  
   243  	case e.lastType == messageOpen && next != messageClose:
   244  		e.indents = append(e.indents, e.indent...)
   245  		e.out = append(e.out, '\n')
   246  		e.out = append(e.out, e.indents...)
   247  
   248  	case e.lastType&(scalar|messageClose) != 0:
   249  		if next == messageClose {
   250  			e.indents = e.indents[:len(e.indents)-len(e.indent)]
   251  		}
   252  		e.out = append(e.out, '\n')
   253  		e.out = append(e.out, e.indents...)
   254  	}
   255  }
   256  
   257  // Snapshot returns the current snapshot for use in Reset.
   258  func (e *Encoder) Snapshot() encoderState {
   259  	return e.encoderState
   260  }
   261  
   262  // Reset resets the Encoder to the given encoderState from a Snapshot.
   263  func (e *Encoder) Reset(es encoderState) {
   264  	e.encoderState = es
   265  }
   266  
   267  // AppendString appends the escaped form of the input string to b.
   268  func AppendString(b []byte, s string) []byte {
   269  	return appendString(b, s, false)
   270  }