git.lukeshu.com/go/lowmemjson@v0.3.9-0.20230723050957-72f6d13f6fb2/decode_scan.go (about)

     1  // Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
     2  //
     3  // SPDX-License-Identifier: GPL-2.0-or-later
     4  
     5  package lowmemjson
     6  
     7  import (
     8  	"io"
     9  	"unicode/utf8"
    10  
    11  	"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
    12  )
    13  
    14  type runeTypeScanner struct {
    15  	// everything that is not "initialized by constructor" starts
    16  	// out as the zero value.
    17  
    18  	inner io.RuneScanner // initialized by constructor
    19  
    20  	parser jsonparse.Parser // initialized by constructor
    21  	offset int64
    22  
    23  	rTypeOK bool
    24  	repeat  bool
    25  
    26  	rRune   rune
    27  	rSize   int
    28  	rIsRune bool
    29  	rType   jsonparse.RuneType
    30  	rErr    error
    31  }
    32  
    33  // The returned error is a *ReadError, a *SyntaxError, or nil.
    34  // An EOF condition is represented as one of:
    35  //
    36  //	end of value:                   (_, 0, RuneTypeEOF, nil)
    37  //	end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
    38  //	end of file at start of value:  (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF})
    39  func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) {
    40  	switch {
    41  	case sc.rTypeOK && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF):
    42  		// do nothing
    43  	case sc.repeat:
    44  		sc.offset += int64(sc.rSize)
    45  		_, _, _ = sc.inner.ReadRune()
    46  	case sc.parser.IsAtBarrier():
    47  		sc.rTypeOK = true
    48  		sc.rType = jsonparse.RuneTypeEOF
    49  		sc.rRune = 0
    50  		sc.rSize = 0
    51  		sc.rErr = nil
    52  	default:
    53  		sc.rTypeOK = true
    54  	again:
    55  		var err error
    56  		sc.rRune, sc.rSize, err = sc.inner.ReadRune()
    57  		sc.offset += int64(sc.rSize)
    58  		switch err {
    59  		case nil:
    60  			sc.rIsRune = true
    61  			if sc.rRune == utf8.RuneError && sc.rSize == 1 {
    62  				if bs, ok := sc.inner.(io.ByteScanner); ok {
    63  					_ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos
    64  					b, _ := bs.ReadByte()
    65  					_ = bs.UnreadByte()
    66  					_, _, _ = sc.inner.ReadRune()
    67  					sc.rRune = rune(b)
    68  					sc.rIsRune = false
    69  				}
    70  			}
    71  			sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune)
    72  			if err != nil {
    73  				sc.rErr = &DecodeSyntaxError{
    74  					Offset: sc.offset - int64(sc.rSize),
    75  					Err:    err,
    76  				}
    77  			} else {
    78  				sc.rErr = nil
    79  			}
    80  			switch sc.rType {
    81  			case jsonparse.RuneTypeSpace:
    82  				goto again
    83  			case jsonparse.RuneTypeEOF:
    84  				sc.offset -= int64(sc.rSize)
    85  				sc.rRune = 0
    86  				sc.rSize = 0
    87  				_ = sc.inner.UnreadRune()
    88  			}
    89  		case io.EOF:
    90  			sc.rType, err = sc.parser.HandleEOF()
    91  			if err != nil {
    92  				sc.rErr = &DecodeSyntaxError{
    93  					Offset: sc.offset,
    94  					Err:    err,
    95  				}
    96  			} else {
    97  				sc.rErr = nil
    98  			}
    99  		default:
   100  			sc.rType = 0
   101  			sc.rErr = &DecodeReadError{
   102  				Offset: sc.offset,
   103  				Err:    err,
   104  			}
   105  		}
   106  	}
   107  	sc.repeat = false
   108  	if sc.rSize > 0 && !sc.rIsRune {
   109  		return utf8.RuneError, sc.rSize, sc.rType, sc.rErr
   110  	}
   111  	return sc.rRune, sc.rSize, sc.rType, sc.rErr
   112  }
   113  
   114  // UnreadRune undoes a call to .ReadRuneType().
   115  //
   116  // If the last call to .ReadRuneType() has already been unread, or if
   117  // that call returned a rune with size 0, then ErrInvalidUnreadRune is
   118  // returned.  Otherwise, nil is returned.
   119  func (sc *runeTypeScanner) UnreadRune() error {
   120  	if sc.repeat || sc.rSize == 0 {
   121  		return ErrInvalidUnreadRune
   122  	}
   123  	sc.repeat = true
   124  	sc.offset -= int64(sc.rSize)
   125  	_ = sc.inner.UnreadRune()
   126  	return nil
   127  }
   128  
   129  func (sc *runeTypeScanner) InputOffset() int64 {
   130  	return sc.offset
   131  }
   132  
   133  func (sc *runeTypeScanner) PushReadBarrier() {
   134  	sc.parser.PushReadBarrier()
   135  }
   136  
   137  func (sc *runeTypeScanner) PopReadBarrier() {
   138  	sc.parser.PopBarrier()
   139  	switch {
   140  	case sc.repeat:
   141  		// re-figure the rType and rErr
   142  		var err error
   143  		sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune)
   144  		if err != nil {
   145  			sc.rErr = &DecodeSyntaxError{
   146  				Offset: sc.offset - int64(sc.rSize),
   147  				Err:    err,
   148  			}
   149  		} else {
   150  			sc.rErr = nil
   151  		}
   152  	case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF:
   153  		sc.rTypeOK = false // forget the sticky EOF
   154  	}
   155  }
   156  
   157  func (sc *runeTypeScanner) Reset() {
   158  	sc.parser.Reset()
   159  	sc.rTypeOK = false // forget any sticky errors/EOF
   160  	sc.repeat = false  // feed the rune (if any) through the parser again
   161  }