github.com/zerosnake0/jzon@v0.0.9-0.20230801092939-1b135cb83f7f/iterator_str.go (about)

     1  package jzon
     2  
     3  import (
     4  	"unicode/utf16"
     5  )
     6  
     7  const (
     8  	noEscape   = 0
     9  	invalidHex = -1
    10  )
    11  
    12  var (
    13  	escapeMap [charNum]byte
    14  	hexValue  [charNum]int8
    15  )
    16  
    17  func init() {
    18  	// escaped characters
    19  	for i := 0; i < charNum; i++ {
    20  		escapeMap[i] = noEscape
    21  	}
    22  	for k, v := range map[byte]byte{
    23  		'"':  '"',
    24  		'\\': '\\',
    25  		'/':  '/',
    26  		'b':  '\b',
    27  		'f':  '\f',
    28  		'n':  '\n',
    29  		'r':  '\r',
    30  		't':  '\t',
    31  	} {
    32  		escapeMap[k] = v
    33  	}
    34  	// hex values
    35  	for i := 0; i < charNum; i++ {
    36  		hexValue[i] = invalidHex
    37  	}
    38  	for c := '0'; c <= '9'; c++ {
    39  		hexValue[c] = int8(c - '0')
    40  	}
    41  	for c := 'a'; c <= 'f'; c++ {
    42  		hexValue[c] = int8(c - 'a' + 10)
    43  	}
    44  	for c := 'A'; c <= 'F'; c++ {
    45  		hexValue[c] = int8(c - 'A' + 10)
    46  	}
    47  }
    48  
    49  func (it *Iterator) readU4() (ret rune, err error) {
    50  	remain := 4
    51  	for {
    52  		i := it.head
    53  		for ; i < it.tail; i++ {
    54  			c := it.buffer[i]
    55  			u4v := hexValue[c]
    56  			if u4v == invalidHex {
    57  				return 0, InvalidUnicodeCharError{c: c}
    58  			}
    59  			ret = ret<<4 + int32(u4v)
    60  			if remain == 1 {
    61  				it.head = i + 1
    62  				return
    63  			}
    64  			remain--
    65  		}
    66  		it.head = i
    67  		if err = it.readMore(); err != nil {
    68  			return
    69  		}
    70  	}
    71  }
    72  
    73  func (it *Iterator) readEscapedChar(b []byte) ([]byte, error) {
    74  	c, err := it.nextByte()
    75  	if err != nil {
    76  		return b, err
    77  	}
    78  	escaped := escapeMap[c]
    79  	if escaped != noEscape {
    80  		it.head++
    81  		return append(b, escaped), nil
    82  	}
    83  	if c != 'u' {
    84  		return b, InvalidEscapeCharError{c: c}
    85  	}
    86  	it.head++
    87  	r, err := it.readU4()
    88  	if err != nil {
    89  		return b, err
    90  	}
    91  Retry:
    92  	if utf16.IsSurrogate(r) {
    93  		c, err := it.nextByte()
    94  		if err != nil {
    95  			return b, err
    96  		}
    97  		if c != '\\' {
    98  			return appendRune(b, r), nil
    99  		}
   100  		it.head++
   101  		c, err = it.nextByte()
   102  		if err != nil {
   103  			return b, err
   104  		}
   105  		if c != 'u' {
   106  			b = appendRune(b, r)
   107  			escaped := escapeMap[c]
   108  			if escaped == noEscape {
   109  				return b, InvalidEscapeCharError{c: c}
   110  			}
   111  			it.head++
   112  			return append(b, escaped), nil
   113  		}
   114  		it.head++
   115  		r2, err := it.readU4()
   116  		if err != nil {
   117  			return b, err
   118  		}
   119  		combined := utf16.DecodeRune(r, r2)
   120  		if combined == runeError {
   121  			b = appendRune(b, r)
   122  			r = r2
   123  			goto Retry
   124  		}
   125  		return appendRune(b, combined), nil
   126  	}
   127  	return appendRune(b, r), nil
   128  }
   129  
   130  // internal, call only after a '"' is consumed
   131  // the result is a part of the temp buffer, should be copied if
   132  // the data needs to be saved
   133  func (it *Iterator) readStringAsSlice() (_ []byte, err error) {
   134  	for i := it.head; i < it.tail; i++ {
   135  		c := it.buffer[i]
   136  		if c < ' ' { // json.org
   137  			return nil, InvalidStringCharError{c: c}
   138  		}
   139  		if c == '"' {
   140  			it.tmpBuffer = append(it.tmpBuffer[:0], it.buffer[it.head:i]...)
   141  			it.head = i + 1
   142  			return it.tmpBuffer, nil
   143  		} else if c == '\\' {
   144  			buf := append(it.tmpBuffer[:0], it.buffer[it.head:i]...)
   145  			it.head = i + 1
   146  			buf, err = it.readEscapedChar(buf)
   147  			if err != nil {
   148  				it.tmpBuffer = buf
   149  				return nil, err
   150  			}
   151  			i = it.head
   152  			buf, err = it.readStringAsSliceSlow(buf)
   153  			it.tmpBuffer = buf
   154  			return buf, err
   155  		}
   156  	}
   157  	buf := append(it.tmpBuffer[:0], it.buffer[it.head:it.tail]...)
   158  	it.head = it.tail
   159  	if err := it.readMore(); err != nil {
   160  		it.tmpBuffer = buf
   161  		return nil, err
   162  	}
   163  	buf, err = it.readStringAsSliceSlow(buf)
   164  	it.tmpBuffer = buf
   165  	return buf, err
   166  }
   167  
   168  func (it *Iterator) readStringAsSliceSlow(buf []byte) (_ []byte, err error) {
   169  	for {
   170  		i := it.head
   171  		for i < it.tail {
   172  			c := it.buffer[i]
   173  			if c < ' ' { // json.org
   174  				return buf, InvalidStringCharError{c: c}
   175  			}
   176  			if c == '"' {
   177  				buf = append(buf, it.buffer[it.head:i]...)
   178  				it.head = i + 1
   179  				return buf, nil
   180  			} else if c == '\\' {
   181  				buf = append(buf, it.buffer[it.head:i]...)
   182  				it.head = i + 1
   183  				buf, err = it.readEscapedChar(buf)
   184  				if err != nil {
   185  					return buf, err
   186  				}
   187  				i = it.head
   188  			} else {
   189  				i++
   190  			}
   191  		}
   192  		// i == it.tail
   193  		buf = append(buf, it.buffer[it.head:i]...)
   194  		it.head = i
   195  		if err = it.readMore(); err != nil {
   196  			return buf, err
   197  		}
   198  	}
   199  }
   200  
   201  func (it *Iterator) expectQuote() error {
   202  	c, err := it.nextToken()
   203  	if err != nil {
   204  		return err
   205  	}
   206  	if c != '"' {
   207  		return UnexpectedByteError{exp: '"', got: c}
   208  	}
   209  	it.head++ // consume the leading '"'
   210  	return nil
   211  }
   212  
   213  // ReadStringAsSlice reads a string as a byte slice
   214  // The returned slice can only be used temporarily, a copy must be made
   215  // if the result needs to be saved
   216  func (it *Iterator) ReadStringAsSlice() (_ []byte, err error) {
   217  	if err = it.expectQuote(); err != nil {
   218  		return
   219  	}
   220  	return it.readStringAsSlice()
   221  }
   222  
   223  // ReadStringAndAppend reads a string and appends to a byte slice
   224  func (it *Iterator) ReadStringAndAppend(buf []byte) (_ []byte, err error) {
   225  	if err = it.expectQuote(); err != nil {
   226  		return
   227  	}
   228  	s, err := it.readStringAsSlice()
   229  	if err != nil {
   230  		return
   231  	}
   232  	return append(buf, s...), nil
   233  }
   234  
   235  // internal, call only after a '"' is consumed
   236  func (it *Iterator) readString() (ret string, err error) {
   237  	buf, err := it.readStringAsSlice()
   238  	if err == nil {
   239  		ret = string(buf)
   240  	}
   241  	return
   242  }
   243  
   244  // ReadString reads a string
   245  func (it *Iterator) ReadString() (_ string, err error) {
   246  	if err = it.expectQuote(); err != nil {
   247  		return
   248  	}
   249  	return it.readString()
   250  }
   251  
   252  // From unicode/utf8 (which is also used by jsoniter)
   253  const (
   254  	t1 = 0x00 // 0000 0000
   255  	tx = 0x80 // 1000 0000
   256  	t2 = 0xC0 // 1100 0000
   257  	t3 = 0xE0 // 1110 0000
   258  	t4 = 0xF0 // 1111 0000
   259  	t5 = 0xF8 // 1111 1000
   260  
   261  	maskx = 0x3F // 0011 1111
   262  	mask2 = 0x1F // 0001 1111
   263  	mask3 = 0x0F // 0000 1111
   264  	mask4 = 0x07 // 0000 0111
   265  
   266  	rune1Max = 1<<7 - 1
   267  	rune2Max = 1<<11 - 1
   268  	rune3Max = 1<<16 - 1
   269  
   270  	surrogateMin = 0xD800
   271  	surrogateMax = 0xDFFF
   272  
   273  	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
   274  	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
   275  )
   276  
   277  func appendRune(p []byte, r rune) []byte {
   278  	// Negative values are erroneous. Making it unsigned addresses the problem.
   279  	switch i := uint32(r); {
   280  	case i <= rune1Max:
   281  		return append(p, byte(r))
   282  	case i <= rune2Max:
   283  		return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
   284  	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
   285  		r = runeError
   286  		fallthrough
   287  	case i <= rune3Max:
   288  		return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx,
   289  			tx|byte(r)&maskx)
   290  	default:
   291  		return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx,
   292  			tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
   293  	}
   294  }