github.com/lbryio/lbcd@v0.22.119/txscript/tokenizer.go (about)

     1  // Copyright (c) 2019 The Decred developers
     2  // Use of this source code is governed by an ISC
     3  // license that can be found in the LICENSE file.
     4  
     5  package txscript
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  )
    11  
    12  // opcodeArrayRef is used to break initialization cycles.
    13  var opcodeArrayRef *[256]opcode
    14  
    15  func init() {
    16  	opcodeArrayRef = &opcodeArray
    17  }
    18  
    19  // ScriptTokenizer provides a facility for easily and efficiently tokenizing
    20  // transaction scripts without creating allocations.  Each successive opcode is
    21  // parsed with the Next function, which returns false when iteration is
    22  // complete, either due to successfully tokenizing the entire script or
    23  // encountering a parse error.  In the case of failure, the Err function may be
    24  // used to obtain the specific parse error.
    25  //
    26  // Upon successfully parsing an opcode, the opcode and data associated with it
    27  // may be obtained via the Opcode and Data functions, respectively.
    28  //
    29  // The ByteIndex function may be used to obtain the tokenizer's current offset
    30  // into the raw script.
    31  type ScriptTokenizer struct {
    32  	script  []byte
    33  	version uint16
    34  	offset  int32
    35  	op      *opcode
    36  	data    []byte
    37  	err     error
    38  }
    39  
    40  // Done returns true when either all opcodes have been exhausted or a parse
    41  // failure was encountered and therefore the state has an associated error.
    42  func (t *ScriptTokenizer) Done() bool {
    43  	return t.err != nil || t.offset >= int32(len(t.script))
    44  }
    45  
    46  // Next attempts to parse the next opcode and returns whether or not it was
    47  // successful.  It will not be successful if invoked when already at the end of
    48  // the script, a parse failure is encountered, or an associated error already
    49  // exists due to a previous parse failure.
    50  //
    51  // In the case of a true return, the parsed opcode and data can be obtained with
    52  // the associated functions and the offset into the script will either point to
    53  // the next opcode or the end of the script if the final opcode was parsed.
    54  //
    55  // In the case of a false return, the parsed opcode and data will be the last
    56  // successfully parsed values (if any) and the offset into the script will
    57  // either point to the failing opcode or the end of the script if the function
    58  // was invoked when already at the end of the script.
    59  //
    60  // Invoking this function when already at the end of the script is not
    61  // considered an error and will simply return false.
    62  func (t *ScriptTokenizer) Next() bool {
    63  	if t.Done() {
    64  		return false
    65  	}
    66  
    67  	op := &opcodeArrayRef[t.script[t.offset]]
    68  	switch {
    69  	// No additional data.  Note that some of the opcodes, notably OP_1NEGATE,
    70  	// OP_0, and OP_[1-16] represent the data themselves.
    71  	case op.length == 1:
    72  		t.offset++
    73  		t.op = op
    74  		t.data = nil
    75  		return true
    76  
    77  	// Data pushes of specific lengths -- OP_DATA_[1-75].
    78  	case op.length > 1:
    79  		script := t.script[t.offset:]
    80  		if len(script) < op.length {
    81  			str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
    82  				"has %d remaining", op.name, op.length, len(script))
    83  			t.err = scriptError(ErrMalformedPush, str)
    84  			return false
    85  		}
    86  
    87  		// Move the offset forward and set the opcode and data accordingly.
    88  		t.offset += int32(op.length)
    89  		t.op = op
    90  		t.data = script[1:op.length]
    91  		return true
    92  
    93  	// Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}.
    94  	case op.length < 0:
    95  		script := t.script[t.offset+1:]
    96  		if len(script) < -op.length {
    97  			str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
    98  				"has %d remaining", op.name, -op.length, len(script))
    99  			t.err = scriptError(ErrMalformedPush, str)
   100  			return false
   101  		}
   102  
   103  		// Next -length bytes are little endian length of data.
   104  		var dataLen int32
   105  		switch op.length {
   106  		case -1:
   107  			dataLen = int32(script[0])
   108  		case -2:
   109  			dataLen = int32(binary.LittleEndian.Uint16(script[:2]))
   110  		case -4:
   111  			dataLen = int32(binary.LittleEndian.Uint32(script[:4]))
   112  		default:
   113  			// In practice it should be impossible to hit this
   114  			// check as each op code is predefined, and only uses
   115  			// the specified lengths.
   116  			str := fmt.Sprintf("invalid opcode length %d", op.length)
   117  			t.err = scriptError(ErrMalformedPush, str)
   118  			return false
   119  		}
   120  
   121  		// Move to the beginning of the data.
   122  		script = script[-op.length:]
   123  
   124  		// Disallow entries that do not fit script or were sign extended.
   125  		if dataLen > int32(len(script)) || dataLen < 0 {
   126  			str := fmt.Sprintf("opcode %s pushes %d bytes, but script only "+
   127  				"has %d remaining", op.name, dataLen, len(script))
   128  			t.err = scriptError(ErrMalformedPush, str)
   129  			return false
   130  		}
   131  
   132  		// Move the offset forward and set the opcode and data accordingly.
   133  		t.offset += 1 + int32(-op.length) + dataLen
   134  		t.op = op
   135  		t.data = script[:dataLen]
   136  		return true
   137  	}
   138  
   139  	// The only remaining case is an opcode with length zero which is
   140  	// impossible.
   141  	panic("unreachable")
   142  }
   143  
   144  // Script returns the full script associated with the tokenizer.
   145  func (t *ScriptTokenizer) Script() []byte {
   146  	return t.script
   147  }
   148  
   149  // ByteIndex returns the current offset into the full script that will be parsed
   150  // next and therefore also implies everything before it has already been parsed.
   151  func (t *ScriptTokenizer) ByteIndex() int32 {
   152  	return t.offset
   153  }
   154  
   155  // Opcode returns the current opcode associated with the tokenizer.
   156  func (t *ScriptTokenizer) Opcode() byte {
   157  	return t.op.value
   158  }
   159  
   160  // Data returns the data associated with the most recently successfully parsed
   161  // opcode.
   162  func (t *ScriptTokenizer) Data() []byte {
   163  	return t.data
   164  }
   165  
   166  // Err returns any errors currently associated with the tokenizer.  This will
   167  // only be non-nil in the case a parsing error was encountered.
   168  func (t *ScriptTokenizer) Err() error {
   169  	return t.err
   170  }
   171  
   172  // MakeScriptTokenizer returns a new instance of a script tokenizer.  Passing
   173  // an unsupported script version will result in the returned tokenizer
   174  // immediately having an err set accordingly.
   175  //
   176  // See the docs for ScriptTokenizer for more details.
   177  func MakeScriptTokenizer(scriptVersion uint16, script []byte) ScriptTokenizer {
   178  	// Only version 0 scripts are currently supported.
   179  	var err error
   180  	if scriptVersion != 0 {
   181  		str := fmt.Sprintf("script version %d is not supported", scriptVersion)
   182  		err = scriptError(ErrUnsupportedScriptVersion, str)
   183  
   184  	}
   185  	return ScriptTokenizer{version: scriptVersion, script: script, err: err}
   186  }