github.com/urso/go-structform@v0.0.2/cborl/parse.go (about)

     1  package cborl
     2  
     3  import (
     4  	"encoding/binary"
     5  	"io"
     6  	"math"
     7  
     8  	structform "github.com/urso/go-structform"
     9  )
    10  
    11  type Parser struct {
    12  	visitor    structform.Visitor
    13  	strVisitor structform.StringRefVisitor
    14  
    15  	// last fail state
    16  	err error
    17  
    18  	// parser state machine
    19  	state stateStack
    20  
    21  	length lengthStack
    22  
    23  	buffer  []byte
    24  	buffer0 [64]byte
    25  }
    26  
    27  type state struct {
    28  	major uint8
    29  	minor uint8
    30  }
    31  
    32  // additional parser state 'major' types
    33  const (
    34  	stFail          uint8 = 1
    35  	stValue         uint8 = 2
    36  	stLen           uint8 = 3
    37  	stStartX        uint8 = 4
    38  	stIndef         uint8 = 1
    39  	stStartArr      uint8 = majorArr | stStartX
    40  	stStartMap      uint8 = majorMap | stStartX
    41  	stStartIndefArr uint8 = majorArr | stStartX | stIndef
    42  	stStartIndefMap uint8 = majorMap | stStartX | stIndef
    43  	stKey           uint8 = majorMap | 8
    44  	stElem          uint8 = majorMap | 9
    45  )
    46  
    47  const (
    48  	stStart uint8 = iota + 1
    49  	stCont
    50  )
    51  
    52  func NewParser(vs structform.Visitor) *Parser {
    53  	p := &Parser{}
    54  	p.init(vs)
    55  	return p
    56  }
    57  
    58  func ParseReader(in io.Reader, vs structform.Visitor) (int64, error) {
    59  	p := NewParser(vs)
    60  	i, err := io.Copy(p, in)
    61  	return i, err
    62  }
    63  
    64  func Parse(b []byte, vs structform.Visitor) error {
    65  	return NewParser(vs).Parse(b)
    66  }
    67  
    68  func ParseString(str string, vs structform.Visitor) error {
    69  	return NewParser(vs).ParseString(str)
    70  }
    71  
    72  func (p *Parser) init(vs structform.Visitor) {
    73  	*p = Parser{
    74  		visitor:    vs,
    75  		strVisitor: structform.MakeStringRefVisitor(vs),
    76  	}
    77  	p.buffer = p.buffer0[:0]
    78  	p.length.init()
    79  	p.state.init(state{stValue, stStart})
    80  }
    81  
    82  func (p *Parser) Write(b []byte) (int, error) {
    83  	p.err = p.feed(b)
    84  	if p.err != nil {
    85  		return 0, p.err
    86  	}
    87  	return len(b), nil
    88  }
    89  
    90  func (p *Parser) ParseString(str string) error {
    91  	return p.Parse(str2Bytes(str))
    92  }
    93  
    94  func (p *Parser) Parse(b []byte) error {
    95  	return p.feed(b)
    96  }
    97  
    98  func (p *Parser) feed(b []byte) error {
    99  	for len(b) > 0 {
   100  		n, _, err := p.feedUntil(b)
   101  		if err != nil {
   102  			return err
   103  		}
   104  
   105  		b = b[n:]
   106  	}
   107  
   108  	return nil
   109  }
   110  
   111  func (p *Parser) feedUntil(b []byte) (int, bool, error) {
   112  	var (
   113  		orig = b
   114  		done bool
   115  		err  error
   116  	)
   117  
   118  	for {
   119  		b, done, err = p.execStep(b)
   120  		if done || err != nil {
   121  			break
   122  		}
   123  
   124  		// continue parsing if input buffer is not empty, or structure with length
   125  		// fields must be initialized
   126  		// -> structures with length 0 will be reported immediately
   127  		contParse := len(b) != 0 ||
   128  			(p.state.current.major&(stStartX|stIndef)) == stStartX
   129  		if !contParse {
   130  			break
   131  		}
   132  	}
   133  	return len(orig) - len(b), done, err
   134  }
   135  
   136  func (p *Parser) execStep(b []byte) ([]byte, bool, error) {
   137  	var (
   138  		err  error
   139  		done bool
   140  	)
   141  
   142  	switch p.state.current.major {
   143  	case stFail:
   144  		return b, false, p.err
   145  	case stValue:
   146  		b, done, err = p.stepValue(b)
   147  
   148  	case stLen:
   149  		b = p.stepLen(b)
   150  	case majorUint:
   151  		b, done, err = p.stepUint(b)
   152  	case majorNeg:
   153  		b, done, err = p.stepNeg(b)
   154  	case codeSingleFloat:
   155  		b, done, err = p.stepSingleFloat(b)
   156  	case codeDoubleFloat:
   157  		b, done, err = p.stepDoubleFloat(b)
   158  
   159  	case majorBytes | stStartX:
   160  		if p.length.current == 0 {
   161  			err = p.visitor.OnArrayStart(0, structform.ByteType)
   162  			if err == nil {
   163  				err = p.visitor.OnArrayFinished()
   164  				p.length.pop()
   165  				if err == nil {
   166  					done, err = p.popState()
   167  				}
   168  			}
   169  
   170  			break
   171  		}
   172  
   173  		p.state.current.major &= ^stStartX
   174  		if len(b) == 0 {
   175  			break
   176  		}
   177  		fallthrough
   178  	case majorBytes:
   179  		b, done, err = p.stepBytes(b)
   180  
   181  	case majorText | stStartX:
   182  		if p.length.current == 0 {
   183  			p.length.pop()
   184  			err = p.visitor.OnString("")
   185  			if err == nil {
   186  				done, err = p.popState()
   187  			}
   188  			break
   189  		}
   190  
   191  		p.state.current.major &= ^stStartX
   192  		if len(b) == 0 {
   193  			break
   194  		}
   195  		fallthrough
   196  	case majorText:
   197  		b, done, err = p.stepText(b)
   198  
   199  	case stStartArr:
   200  		err = p.visitor.OnArrayStart(int(p.length.current), structform.AnyType)
   201  		if err != nil {
   202  			break
   203  		}
   204  		p.state.pop()
   205  		fallthrough
   206  	case majorArr:
   207  		b, done, err = p.stepArray(b)
   208  
   209  	case stStartIndefArr:
   210  		err = p.visitor.OnArrayStart(-1, structform.AnyType)
   211  		if err != nil {
   212  			break
   213  		}
   214  		p.state.pop()
   215  		fallthrough
   216  	case majorArr | stIndef:
   217  		if b[0] == codeBreak {
   218  			b = b[1:]
   219  			err = p.visitor.OnArrayFinished()
   220  			if err == nil {
   221  				done, err = p.popState()
   222  			}
   223  		} else {
   224  			b, done, err = p.stepValue(b)
   225  		}
   226  
   227  	case stStartMap:
   228  		err = p.visitor.OnObjectStart(int(p.length.current), structform.AnyType)
   229  		if err != nil {
   230  			break
   231  		}
   232  		p.state.pop()
   233  		fallthrough
   234  	case majorMap:
   235  		b, done, err = p.stepMap(b)
   236  	case stStartIndefMap:
   237  		err = p.visitor.OnObjectStart(-1, structform.AnyType)
   238  		if err != nil {
   239  			break
   240  		}
   241  		p.state.pop()
   242  		fallthrough
   243  	case majorMap | stIndef:
   244  		if b[0] == codeBreak {
   245  			err = p.visitor.OnObjectFinished()
   246  			b = b[1:]
   247  			if err == nil {
   248  				done, err = p.popState()
   249  			}
   250  		} else {
   251  			b, done, err = p.initMapKey(b)
   252  		}
   253  	case stKey | stStartX:
   254  		if p.length.current == 0 {
   255  			err = errEmptyKey
   256  			break
   257  		}
   258  
   259  		p.state.current.major &= (^stStartX)
   260  		fallthrough
   261  	case stKey:
   262  		b, done, err = p.stepKey(b)
   263  	case stElem:
   264  		p.state.pop()
   265  		b, done, err = p.stepValue(b)
   266  
   267  	default:
   268  		err = errTODO()
   269  	}
   270  
   271  	return b, done, err
   272  }
   273  
   274  func (p *Parser) popState() (bool, error) {
   275  	p.state.pop()
   276  	return p.onValue()
   277  }
   278  
   279  func (p *Parser) onValue() (bool, error) {
   280  	switch p.state.current.major {
   281  	case majorArr:
   282  		p.length.current--
   283  		_, done, err := p.arrayHandleLen()
   284  		return done, err
   285  
   286  	case majorMap:
   287  		p.length.current--
   288  		_, done, err := p.mapHandleLen()
   289  		return done, err
   290  
   291  	case majorArr | stIndef, majorMap | stIndef:
   292  		return false, nil
   293  	}
   294  	return true, nil
   295  }
   296  
   297  func (p *Parser) stepValue(b []byte) ([]byte, bool, error) {
   298  	if len(b) == 0 {
   299  		return b, false, nil
   300  	}
   301  
   302  	major := b[0] & majorMask
   303  	switch major {
   304  	case majorUint:
   305  		if b[0] < len8b {
   306  			err := p.visitor.OnUint8(b[0])
   307  			done := false
   308  			if err == nil {
   309  				done, err = p.onValue()
   310  			}
   311  			return b[1:], done, err
   312  		}
   313  
   314  		p.state.push(state{major, b[0] & minorMask})
   315  		return b[1:], false, nil
   316  
   317  	case majorNeg:
   318  		minor := b[0] & minorMask
   319  		if v := minor; v < len8b {
   320  			err := p.visitor.OnInt8(int8(^v))
   321  			done := false
   322  			if err == nil {
   323  				done, err = p.onValue()
   324  			}
   325  			return b[1:], done, err
   326  		}
   327  
   328  		p.state.push(state{major, minor})
   329  		return b[1:], false, nil
   330  
   331  	case majorBytes, majorText:
   332  		minor := b[0] & minorMask
   333  		if minor == lenIndef {
   334  			return nil, false, errIndefByteSeq
   335  		} else {
   336  			return p.initByteSeq(major, minor, b[1:])
   337  		}
   338  
   339  	case majorArr, majorMap:
   340  		minor := b[0] & minorMask
   341  		return p.initSub(major, minor, b[1:])
   342  
   343  	case majorTag:
   344  		return nil, false, errTODO()
   345  
   346  	default:
   347  		var (
   348  			err  error
   349  			done bool
   350  		)
   351  
   352  		switch b[0] {
   353  		case codeFalse:
   354  			err = p.visitor.OnBool(false)
   355  			if err == nil {
   356  				done, err = p.onValue()
   357  			}
   358  			return b[1:], done, err
   359  		case codeTrue:
   360  			err = p.visitor.OnBool(true)
   361  			if err == nil {
   362  				done, err = p.onValue()
   363  			}
   364  			return b[1:], done, err
   365  		case codeNull, codeUndef:
   366  			err = p.visitor.OnNil()
   367  			if err == nil {
   368  				done, err = p.onValue()
   369  			}
   370  			return b[1:], done, err
   371  		case codeHalfFloat:
   372  			return b[1:], false, errTODO()
   373  		case codeSingleFloat, codeDoubleFloat:
   374  			p.state.push(state{b[0], stStart})
   375  			return b[1:], false, nil
   376  		}
   377  	}
   378  	return nil, false, errInvalidCode
   379  }
   380  
   381  func (p *Parser) stepUint(in []byte) (b []byte, done bool, err error) {
   382  	b = in
   383  	switch p.state.current.minor {
   384  	case len8b:
   385  		b, done, err = b[1:], true, p.visitor.OnUint8(b[0])
   386  	case len16b:
   387  		var v uint16
   388  		if b, done, v = p.getUint16(b); done {
   389  			err = p.visitor.OnUint16(v)
   390  		}
   391  	case len32b:
   392  		var v uint32
   393  		if b, done, v = p.getUint32(b); done {
   394  			err = p.visitor.OnUint32(v)
   395  		}
   396  	case len64b:
   397  		var v uint64
   398  		if b, done, v = p.getUint64(b); done {
   399  			err = p.visitor.OnUint64(v)
   400  		}
   401  	}
   402  
   403  	if done && err == nil {
   404  		done, err = p.popState()
   405  	}
   406  
   407  	return
   408  }
   409  
   410  func (p *Parser) stepBytes(b []byte) ([]byte, bool, error) {
   411  	// stream raw bytes via array visitor
   412  
   413  	var (
   414  		st  = &p.state.current
   415  		err error
   416  	)
   417  
   418  	if st.minor == stStart {
   419  		err = p.visitor.OnArrayStart(int(p.length.current), structform.ByteType)
   420  		if err != nil {
   421  			return nil, false, err
   422  		}
   423  		st.minor = stCont
   424  	}
   425  
   426  	L := int(p.length.current)
   427  	done := len(b) >= L
   428  	if !done {
   429  		L = len(b)
   430  		p.length.current -= int64(L)
   431  	}
   432  
   433  	for _, c := range b[:L] {
   434  		if err := p.visitor.OnByte(c); err != nil {
   435  			return nil, false, err
   436  		}
   437  	}
   438  
   439  	b = b[L:]
   440  	if done {
   441  		err = p.visitor.OnArrayFinished()
   442  		p.length.pop()
   443  		if err == nil {
   444  			done, err = p.popState()
   445  		}
   446  	}
   447  	return b, done, err
   448  }
   449  
   450  func (p *Parser) stepText(b []byte) ([]byte, bool, error) {
   451  	b, tmp := p.collect(b, int(p.length.current))
   452  	if tmp == nil {
   453  		return nil, false, nil
   454  	}
   455  
   456  	p.length.pop()
   457  
   458  	done := true
   459  	err := p.strVisitor.OnStringRef(tmp)
   460  	if err == nil {
   461  		done, err = p.popState()
   462  	}
   463  	return b, done, err
   464  }
   465  
   466  func (p *Parser) stepArray(b []byte) ([]byte, bool, error) {
   467  	val, done, err := p.arrayHandleLen()
   468  	if val {
   469  		b, done, err = p.stepValue(b)
   470  	}
   471  	return b, done, err
   472  }
   473  
   474  func (p *Parser) arrayHandleLen() (value, done bool, err error) {
   475  	if p.length.current > 0 {
   476  		return true, false, nil
   477  	}
   478  
   479  	err = p.visitor.OnArrayFinished()
   480  	if err == nil {
   481  		p.length.pop()
   482  		done, err = p.popState()
   483  	}
   484  
   485  	return false, done, err
   486  }
   487  
   488  func (p *Parser) stepMap(b []byte) ([]byte, bool, error) {
   489  	kv, done, err := p.mapHandleLen()
   490  	if kv && len(b) > 0 {
   491  		b, done, err = p.initMapKey(b)
   492  	}
   493  	return b, done, err
   494  }
   495  
   496  func (p *Parser) mapHandleLen() (kv, done bool, err error) {
   497  	if p.length.current > 0 {
   498  		return true, false, nil
   499  	}
   500  
   501  	err = p.visitor.OnObjectFinished()
   502  	if err == nil {
   503  		p.length.pop()
   504  		done, err = p.popState()
   505  	}
   506  	return false, done, err
   507  }
   508  
   509  func (p *Parser) initMapKey(b []byte) ([]byte, bool, error) {
   510  	// parse key:
   511  	major := b[0] & majorMask
   512  	if major != majorText {
   513  		return nil, false, errTextKeyRequired
   514  	}
   515  
   516  	minor := b[0] & minorMask
   517  	if minor == lenIndef {
   518  		return nil, false, errIndefByteSeq
   519  	}
   520  
   521  	return p.initByteSeq(stKey, minor, b[1:])
   522  }
   523  
   524  func (p *Parser) stepKey(b []byte) ([]byte, bool, error) {
   525  	b, tmp := p.collect(b, int(p.length.current))
   526  	if tmp == nil {
   527  		return nil, false, nil
   528  	}
   529  
   530  	err := p.strVisitor.OnKeyRef(tmp)
   531  	if err == nil {
   532  		p.length.pop()
   533  		p.state.current.major = stElem
   534  	}
   535  	return b, false, err
   536  }
   537  
   538  func (p *Parser) initByteSeq(major, minor uint8, b []byte) ([]byte, bool, error) {
   539  	if v := minor; v < len8b {
   540  		p.state.push(state{major | stStartX, stStart})
   541  		p.length.push(int64(v))
   542  		return b, false, nil
   543  	}
   544  
   545  	p.state.push(state{major | stStartX, stStart})
   546  	p.state.push(state{stLen, minor})
   547  	return b, false, nil
   548  }
   549  
   550  func (p *Parser) initSub(major, minor uint8, b []byte) ([]byte, bool, error) {
   551  	if minor == lenIndef {
   552  		// TODO: replace 2 state pushes with 1 state push + mask removing startX from current state
   553  		p.state.push(state{major | stIndef, stStart})
   554  		p.state.push(state{major | stStartX | stIndef, stStart})
   555  		return b, false, nil
   556  	}
   557  
   558  	if v := minor; v < len8b {
   559  		p.state.push(state{major, stStart})
   560  		p.state.push(state{major | stStartX, stStart})
   561  		p.length.push(int64(v))
   562  		return b, false, nil
   563  	}
   564  
   565  	p.state.push(state{major, stStart})
   566  	p.state.push(state{major | stStartX, stStart})
   567  	p.state.push(state{stLen, minor})
   568  	return b, false, nil
   569  }
   570  
   571  func (p *Parser) stepLen(b []byte) []byte {
   572  	var done bool
   573  
   574  	switch p.state.current.minor {
   575  	case len8b:
   576  		p.length.push(int64(b[0]))
   577  		b, done = b[1:], true
   578  	case len16b:
   579  		var v uint16
   580  		if b, done, v = p.getUint16(b); done {
   581  			p.length.push(int64(v))
   582  		}
   583  	case len32b:
   584  		var v uint32
   585  		if b, done, v = p.getUint32(b); done {
   586  			p.length.push(int64(v))
   587  		}
   588  
   589  	case len64b:
   590  		var v uint64
   591  		if b, done, v = p.getUint64(b); done {
   592  			p.length.push(int64(v))
   593  		}
   594  	}
   595  
   596  	if done {
   597  		p.state.pop()
   598  	}
   599  	return b
   600  }
   601  
   602  func (p *Parser) stepNeg(in []byte) (b []byte, done bool, err error) {
   603  	b = in
   604  	switch p.state.current.minor {
   605  	case len8b:
   606  		b, done, err = b[1:], true, p.visitor.OnInt8(int8(^b[0]))
   607  	case len16b:
   608  		var v uint16
   609  		if b, done, v = p.getUint16(b); done {
   610  			err = p.visitor.OnInt16(int16(^v))
   611  		}
   612  	case len32b:
   613  		var v uint32
   614  		if b, done, v = p.getUint32(b); done {
   615  			err = p.visitor.OnInt32(int32(^v))
   616  		}
   617  	case len64b:
   618  		var v uint64
   619  		if b, done, v = p.getUint64(b); done {
   620  			err = p.visitor.OnInt64(int64(^v))
   621  		}
   622  	}
   623  
   624  	if done && err == nil {
   625  		done, err = p.popState()
   626  	}
   627  	return
   628  }
   629  
   630  func (p *Parser) stepSingleFloat(in []byte) (b []byte, done bool, err error) {
   631  	var tmp uint32
   632  	if b, done, tmp = p.getUint32(in); done {
   633  		err = p.visitor.OnFloat32(math.Float32frombits(tmp))
   634  		if err == nil {
   635  			done, err = p.popState()
   636  		}
   637  	}
   638  	return
   639  }
   640  
   641  func (p *Parser) stepDoubleFloat(in []byte) (b []byte, done bool, err error) {
   642  	var tmp uint64
   643  	if b, done, tmp = p.getUint64(in); done {
   644  		err = p.visitor.OnFloat64(math.Float64frombits(tmp))
   645  		if err == nil {
   646  			done, err = p.popState()
   647  		}
   648  	}
   649  	return
   650  }
   651  
   652  func (p *Parser) getUint8(b []byte) ([]byte, bool, uint8) {
   653  	return b[1:], true, b[0]
   654  }
   655  
   656  func (p *Parser) getUint16(b []byte) ([]byte, bool, uint16) {
   657  	b, tmp := p.collect(b, 2)
   658  	if tmp == nil {
   659  		return nil, false, 0
   660  	}
   661  	return b, true, binary.BigEndian.Uint16(tmp)
   662  }
   663  
   664  func (p *Parser) getUint32(b []byte) ([]byte, bool, uint32) {
   665  	b, tmp := p.collect(b, 4)
   666  	if tmp == nil {
   667  		return b, false, 0
   668  	}
   669  
   670  	return b, true, binary.BigEndian.Uint32(tmp)
   671  }
   672  
   673  func (p *Parser) getUint64(b []byte) ([]byte, bool, uint64) {
   674  	b, tmp := p.collect(b, 8)
   675  	if tmp == nil {
   676  		return nil, false, 0
   677  	}
   678  	return b, true, binary.BigEndian.Uint64(tmp)
   679  }
   680  
   681  func (p *Parser) collect(b []byte, count int) ([]byte, []byte) {
   682  	if len(p.buffer) > 0 {
   683  		delta := count - len(p.buffer)
   684  		if delta > 0 {
   685  			N := delta
   686  			complete := true
   687  			if N > len(b) {
   688  				complete = false
   689  				N = len(b)
   690  			}
   691  
   692  			p.buffer = append(p.buffer, b[:N]...)
   693  			if !complete {
   694  				return nil, nil
   695  			}
   696  
   697  			// advance read buffer
   698  			b = b[N:]
   699  		}
   700  
   701  		if len(p.buffer) >= count {
   702  			tmp := p.buffer[:count]
   703  			if len(p.buffer) == count {
   704  				p.buffer = p.buffer0[:0]
   705  			} else {
   706  				p.buffer = p.buffer[count:]
   707  			}
   708  			return b, tmp
   709  		}
   710  	}
   711  
   712  	if len(b) >= count {
   713  		return b[count:], b[:count]
   714  	}
   715  
   716  	p.buffer = append(p.buffer, b...)
   717  	return nil, nil
   718  }
   719  
   720  func numBytes(code uint8) uint8 {
   721  	return 1 << ((code & minorMask) - len8b)
   722  }
   723  
   724  func readInt16(b []byte) int16 { return int16(^readUint16(b)) }
   725  func readInt32(b []byte) int32 { return int32(^readUint32(b)) }
   726  func readInt64(b []byte) int64 { return int64(^readUint64(b)) }
   727  
   728  func readUint16(b []byte) uint16 { return binary.BigEndian.Uint16(b) }
   729  func readUint32(b []byte) uint32 { return binary.BigEndian.Uint32(b) }
   730  func readUint64(b []byte) uint64 { return binary.BigEndian.Uint64(b) }