github.com/dgraph-io/simdjson-go@v0.3.0/parsed_json.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  import (
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"io/ioutil"
    25  	"math"
    26  	"strconv"
    27  )
    28  
    29  //
    30  // For enhanced performance, simdjson-go can point back into the original JSON buffer for strings,
    31  // however this can lead to issues in streaming use cases scenarios, or scenarios in which
    32  // the underlying JSON buffer is reused. So the default behaviour is to create copies of all
    33  // strings (not just those transformed anyway for unicode escape characters) into the separate
    34  // Strings buffer (at the expense of using more memory and less performance).
    35  //
    36  const alwaysCopyStrings = true
    37  
    38  const JSONVALUEMASK = 0xffffffffffffff
    39  const JSONTAGMASK = 0xff << 56
    40  const STRINGBUFBIT = 0x80000000000000
    41  const STRINGBUFMASK = 0x7fffffffffffff
    42  
    43  const maxdepth = 128
    44  
    45  // FloatFlags are flags recorded when converting floats.
    46  type FloatFlags uint64
    47  
    48  // FloatFlag is a flag recorded when parsing floats.
    49  type FloatFlag uint64
    50  
    51  const (
    52  	// FloatOverflowedInteger is set when number in JSON was in integer notation,
    53  	// but under/overflowed both int64 and uint64 and therefore was parsed as float.
    54  	FloatOverflowedInteger FloatFlag = 1 << iota
    55  )
    56  
    57  // Contains returns whether f contains the specified flag.
    58  func (f FloatFlags) Contains(flag FloatFlag) bool {
    59  	return FloatFlag(f)&flag == flag
    60  }
    61  
    62  // Flags converts the flag to FloatFlags and optionally merges more flags.
    63  func (f FloatFlag) Flags(more ...FloatFlag) FloatFlags {
    64  	// We operate on a copy, so we can modify f.
    65  	for _, v := range more {
    66  		f |= v
    67  	}
    68  	return FloatFlags(f)
    69  }
    70  
    71  type ParsedJson struct {
    72  	Message []byte
    73  	Tape    []uint64
    74  	Strings []byte
    75  
    76  	// allows to reuse the internal structures without exposing it.
    77  	internal *internalParsedJson
    78  }
    79  
    80  const indexSlots = 16
    81  const indexSize = 1536                            // Seems to be a good size for the index buffering
    82  const indexSizeWithSafetyBuffer = indexSize - 128 // Make sure we never write beyond buffer
    83  
    84  type indexChan struct {
    85  	index   int
    86  	length  int
    87  	indexes *[indexSize]uint32
    88  }
    89  
    90  type internalParsedJson struct {
    91  	ParsedJson
    92  	containingScopeOffset []uint64
    93  	isvalid               bool
    94  	indexChans            chan indexChan
    95  	indexesChan           indexChan
    96  	buffers               [indexSlots][indexSize]uint32
    97  	buffersOffset         uint64
    98  	ndjson                uint64
    99  }
   100  
   101  // Iter returns a new Iter.
   102  func (pj *ParsedJson) Iter() Iter {
   103  	return Iter{tape: *pj}
   104  }
   105  
   106  // stringAt returns a string at a specific offset in the stringbuffer.
   107  func (pj *ParsedJson) stringAt(offset, length uint64) (string, error) {
   108  	b, err := pj.stringByteAt(offset, length)
   109  	return string(b), err
   110  }
   111  
   112  // stringByteAt returns a string at a specific offset in the stringbuffer.
   113  func (pj *ParsedJson) stringByteAt(offset, length uint64) ([]byte, error) {
   114  	if offset&STRINGBUFBIT == 0 {
   115  		if offset+length > uint64(len(pj.Message)) {
   116  			return nil, fmt.Errorf("string message offset (%v) outside valid area (%v)", offset+length, len(pj.Message))
   117  		}
   118  		return pj.Message[offset : offset+length], nil
   119  	}
   120  
   121  	offset = offset & STRINGBUFMASK
   122  	if offset+length > uint64(len(pj.Strings)) {
   123  		return nil, fmt.Errorf("string buffer offset (%v) outside valid area (%v)", offset+length, len(pj.Strings))
   124  	}
   125  	return pj.Strings[offset : offset+length], nil
   126  }
   127  
   128  // Iter represents a section of JSON.
   129  // To start iterating it, use Advance() or AdvanceIter() methods
   130  // which will queue the first element.
   131  // If an Iter is copied, the copy will be independent.
   132  type Iter struct {
   133  	// The tape where this iter start.
   134  	tape ParsedJson
   135  
   136  	// offset of the next entry to be decoded
   137  	off int
   138  
   139  	// addNext is the number of entries to skip for the next entry.
   140  	addNext int
   141  
   142  	// current value, exclude tag in top bits
   143  	cur uint64
   144  
   145  	// current tag
   146  	t Tag
   147  }
   148  
   149  // loadTape will load the input from the supplied readers.
   150  func loadTape(tape, strings io.Reader) (*ParsedJson, error) {
   151  	b, err := ioutil.ReadAll(tape)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  	if len(b)&7 != 0 {
   156  		return nil, errors.New("unexpected tape length, should be modulo 8 bytes")
   157  	}
   158  	dst := ParsedJson{
   159  		Tape:    make([]uint64, len(b)/8),
   160  		Strings: nil,
   161  	}
   162  	// Read tape
   163  	for i := range dst.Tape {
   164  		dst.Tape[i] = binary.LittleEndian.Uint64(b[i*8 : i*8+8])
   165  	}
   166  	// Read stringbuf
   167  	b, err = ioutil.ReadAll(strings)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  	dst.Strings = b
   172  	return &dst, nil
   173  }
   174  
   175  // Advance will read the type of the next element
   176  // and queues up the value on the same level.
   177  func (i *Iter) Advance() Type {
   178  	i.off += i.addNext
   179  	if i.off >= len(i.tape.Tape) {
   180  		i.addNext = 0
   181  		i.t = TagEnd
   182  		return TypeNone
   183  	}
   184  
   185  	v := i.tape.Tape[i.off]
   186  	i.cur = v & JSONVALUEMASK
   187  	i.t = Tag(v >> 56)
   188  	i.off++
   189  	i.calcNext(false)
   190  	if i.addNext < 0 {
   191  		// We can't send error, so move to end.
   192  		i.moveToEnd()
   193  		return TypeNone
   194  	}
   195  	return TagToType[i.t]
   196  }
   197  
   198  // AdvanceInto will read the tag of the next element
   199  // and move into and out of arrays , objects and root elements.
   200  // This should only be used for strictly manual parsing.
   201  func (i *Iter) AdvanceInto() Tag {
   202  	i.off += i.addNext
   203  	if i.off >= len(i.tape.Tape) {
   204  		i.addNext = 0
   205  		i.t = TagEnd
   206  		return TagEnd
   207  	}
   208  
   209  	v := i.tape.Tape[i.off]
   210  	i.cur = v & JSONVALUEMASK
   211  	i.t = Tag(v >> 56)
   212  	i.off++
   213  	i.calcNext(true)
   214  	if i.addNext < 0 {
   215  		// We can't send error, so end tape.
   216  		i.moveToEnd()
   217  		return TagEnd
   218  	}
   219  	return i.t
   220  }
   221  
   222  func (i *Iter) moveToEnd() {
   223  	i.off = len(i.tape.Tape)
   224  	i.addNext = 0
   225  	i.t = TagEnd
   226  }
   227  
   228  // calcNext will populate addNext to the correct value to skip.
   229  // Specify whether to move into objects/array.
   230  func (i *Iter) calcNext(into bool) {
   231  	i.addNext = 0
   232  	switch i.t {
   233  	case TagInteger, TagUint, TagFloat, TagString:
   234  		i.addNext = 1
   235  	case TagRoot, TagObjectStart, TagArrayStart:
   236  		if !into {
   237  			i.addNext = int(i.cur) - i.off
   238  		}
   239  	}
   240  }
   241  
   242  // Type returns the queued value type from the previous call to Advance.
   243  func (i *Iter) Type() Type {
   244  	if i.off+i.addNext > len(i.tape.Tape) {
   245  		return TypeNone
   246  	}
   247  	return TagToType[i.t]
   248  }
   249  
   250  // AdvanceIter will read the type of the next element
   251  // and return an iterator only containing the object.
   252  // If dst and i are the same, both will contain the value inside.
   253  func (i *Iter) AdvanceIter(dst *Iter) (Type, error) {
   254  	i.off += i.addNext
   255  	if i.off == len(i.tape.Tape) {
   256  		i.addNext = 0
   257  		i.t = TagEnd
   258  		return TypeNone, nil
   259  	}
   260  	if i.off > len(i.tape.Tape) {
   261  		return TypeNone, errors.New("offset bigger than tape")
   262  	}
   263  
   264  	// Get current value off tape.
   265  	v := i.tape.Tape[i.off]
   266  	i.cur = v & JSONVALUEMASK
   267  	i.t = Tag(v >> 56)
   268  	i.off++
   269  	i.calcNext(false)
   270  	if i.addNext < 0 {
   271  		i.moveToEnd()
   272  		return TypeNone, errors.New("element has negative offset")
   273  	}
   274  
   275  	// Calculate end of this object.
   276  	iEnd := i.off + i.addNext
   277  	typ := TagToType[i.t]
   278  
   279  	// Copy i if different
   280  	if i != dst {
   281  		*dst = *i
   282  	}
   283  	// Move into dst
   284  	dst.calcNext(true)
   285  	if dst.addNext < 0 {
   286  		i.moveToEnd()
   287  		return TypeNone, errors.New("element has negative offset")
   288  	}
   289  
   290  	if iEnd > len(dst.tape.Tape) {
   291  		return TypeNone, errors.New("element extends beyond tape")
   292  	}
   293  
   294  	// Restrict destination.
   295  	dst.tape.Tape = dst.tape.Tape[:iEnd]
   296  
   297  	return typ, nil
   298  }
   299  
   300  // PeekNext will return the next value type.
   301  // Returns TypeNone if next ends iterator.
   302  func (i *Iter) PeekNext() Type {
   303  	if i.off+i.addNext >= len(i.tape.Tape) {
   304  		return TypeNone
   305  	}
   306  	return TagToType[Tag(i.tape.Tape[i.off+i.addNext]>>56)]
   307  }
   308  
   309  // PeekNextTag will return the tag at the current offset.
   310  // Will return TagEnd if at end of iterator.
   311  func (i *Iter) PeekNextTag() Tag {
   312  	if i.off+i.addNext >= len(i.tape.Tape) {
   313  		return TagEnd
   314  	}
   315  	return Tag(i.tape.Tape[i.off+i.addNext] >> 56)
   316  }
   317  
   318  // MarshalJSON will marshal the entire remaining scope of the iterator.
   319  func (i *Iter) MarshalJSON() ([]byte, error) {
   320  	return i.MarshalJSONBuffer(nil)
   321  }
   322  
   323  // MarshalJSONBuffer will marshal the remaining scope of the iterator including the current value.
   324  // An optional buffer can be provided for fewer allocations.
   325  // Output will be appended to the destination.
   326  func (i *Iter) MarshalJSONBuffer(dst []byte) ([]byte, error) {
   327  	var tmpBuf []byte
   328  
   329  	// Pre-allocate for 100 deep.
   330  	var stackTmp [100]uint8
   331  	// We have a stackNone on top of the stack
   332  	stack := stackTmp[:1]
   333  	const (
   334  		stackNone = iota
   335  		stackArray
   336  		stackObject
   337  		stackRoot
   338  	)
   339  
   340  writeloop:
   341  	for {
   342  		// Write key names.
   343  		if stack[len(stack)-1] == stackObject && i.t != TagObjectEnd {
   344  			sb, err := i.StringBytes()
   345  			if err != nil {
   346  				return nil, fmt.Errorf("expected key within object: %w", err)
   347  			}
   348  			dst = append(dst, '"')
   349  			dst = escapeBytes(dst, sb)
   350  			dst = append(dst, '"', ':')
   351  			if i.PeekNextTag() == TagEnd {
   352  				return nil, fmt.Errorf("unexpected end of tape within object")
   353  			}
   354  			i.AdvanceInto()
   355  		}
   356  		//fmt.Println(i.t, len(stack)-1, i.off)
   357  	tagswitch:
   358  		switch i.t {
   359  		case TagRoot:
   360  			isOpenRoot := int(i.cur) > i.off
   361  			if len(stack) > 1 {
   362  				if isOpenRoot {
   363  					return dst, errors.New("root tag open, but not at top of stack")
   364  				}
   365  				l := stack[len(stack)-1]
   366  				switch l {
   367  				case stackRoot:
   368  					if i.PeekNextTag() != TagEnd {
   369  						dst = append(dst, '\n')
   370  					}
   371  					stack = stack[:len(stack)-1]
   372  					break tagswitch
   373  				case stackNone:
   374  					break writeloop
   375  				default:
   376  					return dst, errors.New("root tag, but not at top of stack, got id " + strconv.Itoa(int(l)))
   377  				}
   378  			}
   379  
   380  			if isOpenRoot {
   381  				// Always move into root.
   382  				i.addNext = 0
   383  			}
   384  			i.AdvanceInto()
   385  			stack = append(stack, stackRoot)
   386  			continue
   387  		case TagString:
   388  			sb, err := i.StringBytes()
   389  			if err != nil {
   390  				return nil, err
   391  			}
   392  			dst = append(dst, '"')
   393  			dst = escapeBytes(dst, sb)
   394  			dst = append(dst, '"')
   395  			tmpBuf = tmpBuf[:0]
   396  		case TagInteger:
   397  			v, err := i.Int()
   398  			if err != nil {
   399  				return nil, err
   400  			}
   401  			dst = strconv.AppendInt(dst, v, 10)
   402  		case TagUint:
   403  			v, err := i.Uint()
   404  			if err != nil {
   405  				return nil, err
   406  			}
   407  			dst = strconv.AppendUint(dst, v, 10)
   408  		case TagFloat:
   409  			v, err := i.Float()
   410  			if err != nil {
   411  				return nil, err
   412  			}
   413  			dst, err = appendFloat(dst, v)
   414  			if err != nil {
   415  				return nil, err
   416  			}
   417  		case TagNull:
   418  			dst = append(dst, []byte("null")...)
   419  		case TagBoolTrue:
   420  			dst = append(dst, []byte("true")...)
   421  		case TagBoolFalse:
   422  			dst = append(dst, []byte("false")...)
   423  		case TagObjectStart:
   424  			dst = append(dst, '{')
   425  			stack = append(stack, stackObject)
   426  			// We should not emit commas.
   427  			i.AdvanceInto()
   428  			continue
   429  		case TagObjectEnd:
   430  			dst = append(dst, '}')
   431  			if stack[len(stack)-1] != stackObject {
   432  				return dst, errors.New("end of object with no object on stack")
   433  			}
   434  			stack = stack[:len(stack)-1]
   435  		case TagArrayStart:
   436  			dst = append(dst, '[')
   437  			stack = append(stack, stackArray)
   438  			i.AdvanceInto()
   439  			continue
   440  		case TagArrayEnd:
   441  			dst = append(dst, ']')
   442  			if stack[len(stack)-1] != stackArray {
   443  				return nil, errors.New("end of array with no array on stack")
   444  			}
   445  			stack = stack[:len(stack)-1]
   446  		case TagEnd:
   447  			if i.PeekNextTag() == TagEnd {
   448  				return nil, errors.New("no content queued in iterator")
   449  			}
   450  			i.AdvanceInto()
   451  			continue
   452  		}
   453  
   454  		if i.PeekNextTag() == TagEnd {
   455  			break
   456  		}
   457  		i.AdvanceInto()
   458  
   459  		// Output object separators, etc.
   460  		switch stack[len(stack)-1] {
   461  		case stackArray:
   462  			switch i.t {
   463  			case TagArrayEnd:
   464  			default:
   465  				dst = append(dst, ',')
   466  			}
   467  		case stackObject:
   468  			switch i.t {
   469  			case TagObjectEnd:
   470  			default:
   471  				dst = append(dst, ',')
   472  			}
   473  		}
   474  	}
   475  	if len(stack) > 1 {
   476  		return nil, fmt.Errorf("objects or arrays not closed. left on stack: %v", stack[1:])
   477  	}
   478  	return dst, nil
   479  }
   480  
   481  // Float returns the float value of the next element.
   482  // Integers are automatically converted to float.
   483  func (i *Iter) Float() (float64, error) {
   484  	switch i.t {
   485  	case TagFloat:
   486  		if i.off >= len(i.tape.Tape) {
   487  			return 0, errors.New("corrupt input: expected float, but no more values on tape")
   488  		}
   489  		v := math.Float64frombits(i.tape.Tape[i.off])
   490  		return v, nil
   491  	case TagInteger:
   492  		if i.off >= len(i.tape.Tape) {
   493  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   494  		}
   495  		v := int64(i.tape.Tape[i.off])
   496  		return float64(v), nil
   497  	case TagUint:
   498  		if i.off >= len(i.tape.Tape) {
   499  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   500  		}
   501  		v := i.tape.Tape[i.off]
   502  		return float64(v), nil
   503  	default:
   504  		return 0, fmt.Errorf("unable to convert type %v to float", i.t)
   505  	}
   506  }
   507  
   508  // FloatFlags returns the float value of the next element.
   509  // This will include flags from parsing.
   510  // Integers are automatically converted to float.
   511  func (i *Iter) FloatFlags() (float64, FloatFlags, error) {
   512  	switch i.t {
   513  	case TagFloat:
   514  		if i.off >= len(i.tape.Tape) {
   515  			return 0, 0, errors.New("corrupt input: expected float, but no more values on tape")
   516  		}
   517  		v := math.Float64frombits(i.tape.Tape[i.off])
   518  		return v, 0, nil
   519  	case TagInteger:
   520  		if i.off >= len(i.tape.Tape) {
   521  			return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape")
   522  		}
   523  		v := int64(i.tape.Tape[i.off])
   524  		return float64(v), 0, nil
   525  	case TagUint:
   526  		if i.off >= len(i.tape.Tape) {
   527  			return 0, 0, errors.New("corrupt input: expected integer, but no more values on tape")
   528  		}
   529  		v := i.tape.Tape[i.off]
   530  		return float64(v), FloatFlags(i.cur), nil
   531  	default:
   532  		return 0, 0, fmt.Errorf("unable to convert type %v to float", i.t)
   533  	}
   534  }
   535  
   536  // Int returns the integer value of the next element.
   537  // Integers and floats within range are automatically converted.
   538  func (i *Iter) Int() (int64, error) {
   539  	switch i.t {
   540  	case TagFloat:
   541  		if i.off >= len(i.tape.Tape) {
   542  			return 0, errors.New("corrupt input: expected float, but no more values on tape")
   543  		}
   544  		v := math.Float64frombits(i.tape.Tape[i.off])
   545  		if v > math.MaxInt64 {
   546  			return 0, errors.New("float value overflows int64")
   547  		}
   548  		if v < math.MinInt64 {
   549  			return 0, errors.New("float value underflows int64")
   550  		}
   551  		return int64(v), nil
   552  	case TagInteger:
   553  		if i.off >= len(i.tape.Tape) {
   554  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   555  		}
   556  		v := int64(i.tape.Tape[i.off])
   557  		return v, nil
   558  	case TagUint:
   559  		if i.off >= len(i.tape.Tape) {
   560  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   561  		}
   562  		v := i.tape.Tape[i.off]
   563  		if v > math.MaxInt64 {
   564  			return 0, errors.New("unsigned integer value overflows int64")
   565  		}
   566  		return int64(v), nil
   567  	default:
   568  		return 0, fmt.Errorf("unable to convert type %v to float", i.t)
   569  	}
   570  }
   571  
   572  // Uint returns the unsigned integer value of the next element.
   573  // Positive integers and floats within range are automatically converted.
   574  func (i *Iter) Uint() (uint64, error) {
   575  	switch i.t {
   576  	case TagFloat:
   577  		if i.off >= len(i.tape.Tape) {
   578  			return 0, errors.New("corrupt input: expected float, but no more values on tape")
   579  		}
   580  		v := math.Float64frombits(i.tape.Tape[i.off])
   581  		if v > math.MaxUint64 {
   582  			return 0, errors.New("float value overflows uint64")
   583  		}
   584  		if v < 0 {
   585  			return 0, errors.New("float value is negative. cannot convert to uint")
   586  		}
   587  		return uint64(v), nil
   588  	case TagInteger:
   589  		if i.off >= len(i.tape.Tape) {
   590  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   591  		}
   592  		v := int64(i.tape.Tape[i.off])
   593  		if v < 0 {
   594  			return 0, errors.New("integer value is negative. cannot convert to uint")
   595  		}
   596  
   597  		return uint64(v), nil
   598  	case TagUint:
   599  		if i.off >= len(i.tape.Tape) {
   600  			return 0, errors.New("corrupt input: expected integer, but no more values on tape")
   601  		}
   602  		v := i.tape.Tape[i.off]
   603  		return v, nil
   604  	default:
   605  		return 0, fmt.Errorf("unable to convert type %v to float", i.t)
   606  	}
   607  }
   608  
   609  // String() returns a string value.
   610  func (i *Iter) String() (string, error) {
   611  	if i.t != TagString {
   612  		return "", errors.New("value is not string")
   613  	}
   614  	if i.off >= len(i.tape.Tape) {
   615  		return "", errors.New("corrupt input: no string offset")
   616  	}
   617  
   618  	return i.tape.stringAt(i.cur, i.tape.Tape[i.off])
   619  }
   620  
   621  // StringBytes() returns a byte array.
   622  func (i *Iter) StringBytes() ([]byte, error) {
   623  	if i.t != TagString {
   624  		return nil, errors.New("value is not string")
   625  	}
   626  	if i.off >= len(i.tape.Tape) {
   627  		return nil, errors.New("corrupt input: no string offset on tape")
   628  	}
   629  	return i.tape.stringByteAt(i.cur, i.tape.Tape[i.off])
   630  }
   631  
   632  // StringCvt() returns a string representation of the value.
   633  // Root, Object and Arrays are not supported.
   634  func (i *Iter) StringCvt() (string, error) {
   635  	switch i.t {
   636  	case TagString:
   637  		return i.String()
   638  	case TagInteger:
   639  		v, err := i.Int()
   640  		return strconv.FormatInt(v, 10), err
   641  	case TagUint:
   642  		v, err := i.Uint()
   643  		return strconv.FormatUint(v, 10), err
   644  	case TagFloat:
   645  		v, err := i.Float()
   646  		if err != nil {
   647  			return "", err
   648  		}
   649  		return floatToString(v)
   650  	case TagBoolFalse:
   651  		return "false", nil
   652  	case TagBoolTrue:
   653  		return "true", nil
   654  	case TagNull:
   655  		return "null", nil
   656  	}
   657  	return "", fmt.Errorf("cannot convert type %s to string", TagToType[i.t])
   658  }
   659  
   660  // Root() returns the object embedded in root as an iterator
   661  // along with the type of the content of the first element of the iterator.
   662  // An optional destination can be supplied to avoid allocations.
   663  func (i *Iter) Root(dst *Iter) (Type, *Iter, error) {
   664  	if i.t != TagRoot {
   665  		return TypeNone, dst, errors.New("value is not root")
   666  	}
   667  	if i.cur > uint64(len(i.tape.Tape)) {
   668  		return TypeNone, dst, errors.New("root element extends beyond tape")
   669  	}
   670  	if dst == nil {
   671  		c := *i
   672  		dst = &c
   673  	} else {
   674  		dst.cur = i.cur
   675  		dst.off = i.off
   676  		dst.t = i.t
   677  		dst.tape.Strings = i.tape.Strings
   678  		dst.tape.Message = i.tape.Message
   679  	}
   680  	dst.addNext = 0
   681  	dst.tape.Tape = i.tape.Tape[:i.cur-1]
   682  	return dst.AdvanceInto().Type(), dst, nil
   683  }
   684  
   685  // Bool() returns the bool value.
   686  func (i *Iter) Bool() (bool, error) {
   687  	switch i.t {
   688  	case TagBoolTrue:
   689  		return true, nil
   690  	case TagBoolFalse:
   691  		return false, nil
   692  	}
   693  	return false, fmt.Errorf("value is not bool, but %v", i.t)
   694  }
   695  
   696  // Interface returns the value as an interface.
   697  // Objects are returned as map[string]interface{}.
   698  // Arrays are returned as []interface{}.
   699  // Float values are returned as float64.
   700  // Integer values are returned as int64 or uint64.
   701  // String values are returned as string.
   702  // Boolean values are returned as bool.
   703  // Null values are returned as nil.
   704  // Root objects are returned as []interface{}.
   705  func (i *Iter) Interface() (interface{}, error) {
   706  	switch i.t.Type() {
   707  	case TypeUint:
   708  		return i.Uint()
   709  	case TypeInt:
   710  		return i.Int()
   711  	case TypeFloat:
   712  		return i.Float()
   713  	case TypeNull:
   714  		return nil, nil
   715  	case TypeArray:
   716  		arr, err := i.Array(nil)
   717  		if err != nil {
   718  			return nil, err
   719  		}
   720  		return arr.Interface()
   721  	case TypeString:
   722  		return i.String()
   723  	case TypeObject:
   724  		obj, err := i.Object(nil)
   725  		if err != nil {
   726  			return nil, err
   727  		}
   728  		return obj.Map(nil)
   729  	case TypeBool:
   730  		return i.t == TagBoolTrue, nil
   731  	case TypeRoot:
   732  		var dst []interface{}
   733  		var tmp Iter
   734  		for {
   735  			typ, obj, err := i.Root(&tmp)
   736  			if err != nil {
   737  				return nil, err
   738  			}
   739  			if typ == TypeNone {
   740  				break
   741  			}
   742  			elem, err := obj.Interface()
   743  			if err != nil {
   744  				return nil, err
   745  			}
   746  			dst = append(dst, elem)
   747  			typ = i.Advance()
   748  			if typ != TypeRoot {
   749  				break
   750  			}
   751  		}
   752  		return dst, nil
   753  	case TypeNone:
   754  		if i.PeekNextTag() == TagEnd {
   755  			return nil, errors.New("no content in iterator")
   756  		}
   757  		i.Advance()
   758  		return i.Interface()
   759  	default:
   760  	}
   761  	return nil, fmt.Errorf("unknown tag type: %v", i.t)
   762  }
   763  
   764  // Object will return the next element as an object.
   765  // An optional destination can be given.
   766  func (i *Iter) Object(dst *Object) (*Object, error) {
   767  	if i.t != TagObjectStart {
   768  		return nil, errors.New("next item is not object")
   769  	}
   770  	end := i.cur
   771  	if end < uint64(i.off) {
   772  		return nil, errors.New("corrupt input: object ends at index before start")
   773  	}
   774  	if uint64(len(i.tape.Tape)) < end {
   775  		return nil, errors.New("corrupt input: object extended beyond tape")
   776  	}
   777  	if dst == nil {
   778  		dst = &Object{}
   779  	}
   780  	dst.tape.Tape = i.tape.Tape[:end]
   781  	dst.tape.Strings = i.tape.Strings
   782  	dst.tape.Message = i.tape.Message
   783  	dst.off = i.off
   784  
   785  	return dst, nil
   786  }
   787  
   788  // Array will return the next element as an array.
   789  // An optional destination can be given.
   790  func (i *Iter) Array(dst *Array) (*Array, error) {
   791  	if i.t != TagArrayStart {
   792  		return nil, errors.New("next item is not object")
   793  	}
   794  	end := i.cur
   795  	if uint64(len(i.tape.Tape)) < end {
   796  		return nil, errors.New("corrupt input: object extended beyond tape")
   797  	}
   798  	if dst == nil {
   799  		dst = &Array{}
   800  	}
   801  	dst.tape.Tape = i.tape.Tape[:end]
   802  	dst.tape.Strings = i.tape.Strings
   803  	dst.tape.Message = i.tape.Message
   804  	dst.off = i.off
   805  
   806  	return dst, nil
   807  }
   808  
   809  func (pj *ParsedJson) Reset() {
   810  	pj.Tape = pj.Tape[:0]
   811  	pj.Strings = pj.Strings[:0]
   812  	pj.Message = pj.Message[:0]
   813  }
   814  
   815  func (pj *ParsedJson) get_current_loc() uint64 {
   816  	return uint64(len(pj.Tape))
   817  }
   818  
   819  func (pj *ParsedJson) write_tape(val uint64, c byte) {
   820  	pj.Tape = append(pj.Tape, val|(uint64(c)<<56))
   821  }
   822  
   823  // writeTapeTagVal will write a tag with no embedded value and a value to the tape.
   824  func (pj *ParsedJson) writeTapeTagVal(tag Tag, val uint64) {
   825  	pj.Tape = append(pj.Tape, uint64(tag)<<56, val)
   826  }
   827  
   828  func (pj *ParsedJson) writeTapeTagValFlags(tag Tag, val, flags uint64) {
   829  	pj.Tape = append(pj.Tape, uint64(tag)<<56|flags, val)
   830  }
   831  
   832  func (pj *ParsedJson) write_tape_s64(val int64) {
   833  	pj.writeTapeTagVal(TagInteger, uint64(val))
   834  }
   835  
   836  func (pj *ParsedJson) write_tape_double(d float64) {
   837  	pj.writeTapeTagVal(TagFloat, math.Float64bits(d))
   838  }
   839  
   840  func (pj *ParsedJson) annotate_previousloc(saved_loc uint64, val uint64) {
   841  	pj.Tape[saved_loc] |= val
   842  }
   843  
   844  // Tag indicates the data type of a tape entry
   845  type Tag uint8
   846  
   847  const (
   848  	TagString      = Tag('"')
   849  	TagInteger     = Tag('l')
   850  	TagUint        = Tag('u')
   851  	TagFloat       = Tag('d')
   852  	TagNull        = Tag('n')
   853  	TagBoolTrue    = Tag('t')
   854  	TagBoolFalse   = Tag('f')
   855  	TagObjectStart = Tag('{')
   856  	TagObjectEnd   = Tag('}')
   857  	TagArrayStart  = Tag('[')
   858  	TagArrayEnd    = Tag(']')
   859  	TagRoot        = Tag('r')
   860  	TagEnd         = Tag(0)
   861  )
   862  
   863  var tagOpenToClose = [256]Tag{
   864  	TagObjectStart: TagObjectEnd,
   865  	TagArrayStart:  TagArrayEnd,
   866  	TagRoot:        TagRoot,
   867  }
   868  
   869  func (t Tag) String() string {
   870  	return string([]byte{byte(t)})
   871  }
   872  
   873  // Type is a JSON value type.
   874  type Type uint8
   875  
   876  const (
   877  	TypeNone Type = iota
   878  	TypeNull
   879  	TypeString
   880  	TypeInt
   881  	TypeUint
   882  	TypeFloat
   883  	TypeBool
   884  	TypeObject
   885  	TypeArray
   886  	TypeRoot
   887  )
   888  
   889  // String returns the type as a string.
   890  func (t Type) String() string {
   891  	switch t {
   892  	case TypeNone:
   893  		return "(no type)"
   894  	case TypeNull:
   895  		return "null"
   896  	case TypeString:
   897  		return "string"
   898  	case TypeInt:
   899  		return "int"
   900  	case TypeUint:
   901  		return "uint"
   902  	case TypeFloat:
   903  		return "float"
   904  	case TypeBool:
   905  		return "bool"
   906  	case TypeObject:
   907  		return "object"
   908  	case TypeArray:
   909  		return "array"
   910  	case TypeRoot:
   911  		return "root"
   912  	}
   913  	return "(invalid)"
   914  }
   915  
   916  // TagToType converts a tag to type.
   917  // For arrays and objects only the start tag will return types.
   918  // All non-existing tags returns TypeNone.
   919  var TagToType = [256]Type{
   920  	TagString:      TypeString,
   921  	TagInteger:     TypeInt,
   922  	TagUint:        TypeUint,
   923  	TagFloat:       TypeFloat,
   924  	TagNull:        TypeNull,
   925  	TagBoolTrue:    TypeBool,
   926  	TagBoolFalse:   TypeBool,
   927  	TagObjectStart: TypeObject,
   928  	TagArrayStart:  TypeArray,
   929  	TagRoot:        TypeRoot,
   930  }
   931  
   932  // Type converts a tag to a type.
   933  // Only basic types and array+object start match a type.
   934  func (t Tag) Type() Type {
   935  	return TagToType[t]
   936  }
   937  
   938  func (pj *internalParsedJson) dump_raw_tape() bool {
   939  
   940  	if !pj.isvalid {
   941  		return false
   942  	}
   943  
   944  	for tapeidx := uint64(0); tapeidx < uint64(len(pj.Tape)); tapeidx++ {
   945  		howmany := uint64(0)
   946  		tape_val := pj.Tape[tapeidx]
   947  		ntype := byte(tape_val >> 56)
   948  		fmt.Printf("%d : %c", tapeidx, ntype)
   949  
   950  		if ntype == 'r' {
   951  			howmany = tape_val & JSONVALUEMASK
   952  		} else {
   953  			fmt.Errorf("Error: no starting root node?\n")
   954  			return false
   955  		}
   956  		fmt.Printf("\t// pointing to %d (right after last node)\n", howmany)
   957  
   958  		// Decrement howmany (since we're adding one now for the ndjson support)
   959  		howmany -= 1
   960  
   961  		tapeidx++
   962  		for ; tapeidx < howmany; tapeidx++ {
   963  			tape_val = pj.Tape[tapeidx]
   964  			fmt.Printf("%d : ", tapeidx)
   965  			ntype := Tag(tape_val >> 56)
   966  			payload := tape_val & JSONVALUEMASK
   967  			switch ntype {
   968  			case TagString: // we have a string
   969  				if tapeidx+1 >= howmany {
   970  					return false
   971  				}
   972  				fmt.Printf("string \"")
   973  				tapeidx++
   974  				string_length := pj.Tape[tapeidx]
   975  				str, err := pj.stringAt(payload, string_length)
   976  				if err != nil {
   977  					fmt.Printf("string err:%v\n", err)
   978  					return false
   979  				}
   980  				fmt.Printf("%s (o:%d, l:%d)", print_with_escapes([]byte(str)), payload, string_length)
   981  				fmt.Println("\"")
   982  
   983  			case TagInteger: // we have a long int
   984  				if tapeidx+1 >= howmany {
   985  					return false
   986  				}
   987  				tapeidx++
   988  				fmt.Printf("integer %d\n", int64(pj.Tape[tapeidx]))
   989  
   990  			case TagFloat: // we have a double
   991  				if tapeidx+1 >= howmany {
   992  					return false
   993  				}
   994  				tapeidx++
   995  				fmt.Printf("float %f\n", math.Float64frombits(pj.Tape[tapeidx]))
   996  
   997  			case TagNull: // we have a null
   998  				fmt.Printf("null\n")
   999  
  1000  			case TagBoolTrue: // we have a true
  1001  				fmt.Printf("true\n")
  1002  
  1003  			case TagBoolFalse: // we have a false
  1004  				fmt.Printf("false\n")
  1005  
  1006  			case TagObjectStart: // we have an object
  1007  				fmt.Printf("{\t// pointing to next Tape location %d (first node after the scope) \n", payload)
  1008  
  1009  			case TagObjectEnd: // we end an object
  1010  				fmt.Printf("}\t// pointing to previous Tape location %d (start of the scope) \n", payload)
  1011  
  1012  			case TagArrayStart: // we start an array
  1013  				fmt.Printf("\t// pointing to next Tape location %d (first node after the scope) \n", payload)
  1014  
  1015  			case TagArrayEnd: // we end an array
  1016  				fmt.Printf("]\t// pointing to previous Tape location %d (start of the scope) \n", payload)
  1017  
  1018  			case TagRoot: // we start and end with the root node
  1019  				fmt.Printf("end of root\n")
  1020  				return false
  1021  
  1022  			default:
  1023  				return false
  1024  			}
  1025  		}
  1026  
  1027  		tape_val = pj.Tape[tapeidx]
  1028  		payload := tape_val & JSONVALUEMASK
  1029  		ntype = byte(tape_val >> 56)
  1030  		fmt.Printf("%d : %c\t// pointing to %d (start root)\n", tapeidx, ntype, payload)
  1031  	}
  1032  
  1033  	return true
  1034  }
  1035  
  1036  func print_with_escapes(src []byte) string {
  1037  	return string(escapeBytes(make([]byte, 0, len(src)+len(src)>>4), src))
  1038  }
  1039  
  1040  // escapeBytes will escape JSON bytes.
  1041  // Output is appended to dst.
  1042  func escapeBytes(dst, src []byte) []byte {
  1043  	for _, s := range src {
  1044  		switch s {
  1045  		case '\b':
  1046  			dst = append(dst, '\\', 'b')
  1047  
  1048  		case '\f':
  1049  			dst = append(dst, '\\', 'f')
  1050  
  1051  		case '\n':
  1052  			dst = append(dst, '\\', 'n')
  1053  
  1054  		case '\r':
  1055  			dst = append(dst, '\\', 'r')
  1056  
  1057  		case '"':
  1058  			dst = append(dst, '\\', '"')
  1059  
  1060  		case '\t':
  1061  			dst = append(dst, '\\', 't')
  1062  
  1063  		case '\\':
  1064  			dst = append(dst, '\\', '\\')
  1065  
  1066  		default:
  1067  			if s <= 0x1f {
  1068  				dst = append(dst, '\\', 'u', '0', '0', valToHex[s>>4], valToHex[s&0xf])
  1069  			} else {
  1070  				dst = append(dst, s)
  1071  			}
  1072  		}
  1073  	}
  1074  
  1075  	return dst
  1076  }
  1077  
  1078  var valToHex = [16]byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}
  1079  
  1080  // floatToString converts a float to string similar to Go stdlib.
  1081  func floatToString(f float64) (string, error) {
  1082  	var tmp [32]byte
  1083  	v, err := appendFloat(tmp[:0], f)
  1084  	return string(v), err
  1085  }
  1086  
  1087  // appendFloat converts a float to string similar to Go stdlib and appends it to dst.
  1088  func appendFloat(dst []byte, f float64) ([]byte, error) {
  1089  	if math.IsInf(f, 0) || math.IsNaN(f) {
  1090  		return nil, errors.New("INF or NaN number found")
  1091  	}
  1092  
  1093  	// Convert as if by ES6 number to string conversion.
  1094  	// This matches most other JSON generators.
  1095  	// See golang.org/issue/6384 and golang.org/issue/14135.
  1096  	// Like fmt %g, but the exponent cutoffs are different
  1097  	// and exponents themselves are not padded to two digits.
  1098  	abs := math.Abs(f)
  1099  	fmt := byte('f')
  1100  	if abs != 0 {
  1101  		if abs < 1e-6 || abs >= 1e21 {
  1102  			fmt = 'e'
  1103  		}
  1104  	}
  1105  	dst = strconv.AppendFloat(dst, f, fmt, -1, 64)
  1106  	if fmt == 'e' {
  1107  		// clean up e-09 to e-9
  1108  		n := len(dst)
  1109  		if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
  1110  			dst[n-2] = dst[n-1]
  1111  			dst = dst[:n-1]
  1112  		}
  1113  	}
  1114  	return dst, nil
  1115  }