github.com/btcsuite/btcd@v0.24.0/txscript/tokenizer.go (about)

     1  // Copyright (c) 2019 The Decred developers
     2  // Use of this source code is governed by an ISC
     3  // license that can be found in the LICENSE file.
     4  
     5  package txscript
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  )
    11  
    12  // opcodeArrayRef is used to break initialization cycles.
    13  var opcodeArrayRef *[256]opcode
    14  
    15  func init() {
    16  	opcodeArrayRef = &opcodeArray
    17  }
    18  
    19  // ScriptTokenizer provides a facility for easily and efficiently tokenizing
    20  // transaction scripts without creating allocations.  Each successive opcode is
    21  // parsed with the Next function, which returns false when iteration is
    22  // complete, either due to successfully tokenizing the entire script or
    23  // encountering a parse error.  In the case of failure, the Err function may be
    24  // used to obtain the specific parse error.
    25  //
    26  // Upon successfully parsing an opcode, the opcode and data associated with it
    27  // may be obtained via the Opcode and Data functions, respectively.
    28  //
    29  // The ByteIndex function may be used to obtain the tokenizer's current offset
    30  // into the raw script.
    31  type ScriptTokenizer struct {
    32  	script    []byte
    33  	version   uint16
    34  	offset    int32
    35  	opcodePos int32
    36  	op        *opcode
    37  	data      []byte
    38  	err       error
    39  }
    40  
    41  // Done returns true when either all opcodes have been exhausted or a parse
    42  // failure was encountered and therefore the state has an associated error.
    43  func (t *ScriptTokenizer) Done() bool {
    44  	return t.err != nil || t.offset >= int32(len(t.script))
    45  }
    46  
    47  // Next attempts to parse the next opcode and returns whether or not it was
    48  // successful.  It will not be successful if invoked when already at the end of
    49  // the script, a parse failure is encountered, or an associated error already
    50  // exists due to a previous parse failure.
    51  //
    52  // In the case of a true return, the parsed opcode and data can be obtained with
    53  // the associated functions and the offset into the script will either point to
    54  // the next opcode or the end of the script if the final opcode was parsed.
    55  //
    56  // In the case of a false return, the parsed opcode and data will be the last
    57  // successfully parsed values (if any) and the offset into the script will
    58  // either point to the failing opcode or the end of the script if the function
    59  // was invoked when already at the end of the script.
    60  //
    61  // Invoking this function when already at the end of the script is not
    62  // considered an error and will simply return false.
    63  func (t *ScriptTokenizer) Next() bool {
    64  	if t.Done() {
    65  		return false
    66  	}
    67  
    68  	// Increment the op code position each time we attempt to parse the
    69  	// next op code. Note that since the starting value is -1 (no op codes
    70  	// parsed), by incrementing here, we start at 0, then 1, and so on for
    71  	// the other op codes.
    72  	t.opcodePos++
    73  
    74  	op := &opcodeArrayRef[t.script[t.offset]]
    75  	switch {
    76  	// No additional data.  Note that some of the opcodes, notably OP_1NEGATE,
    77  	// OP_0, and OP_[1-16] represent the data themselves.
    78  	case op.length == 1:
    79  		t.offset++
    80  		t.op = op
    81  		t.data = nil
    82  		return true
    83  
    84  	// Data pushes of specific lengths -- OP_DATA_[1-75].
    85  	case op.length > 1:
    86  		script := t.script[t.offset:]
    87  		if len(script) < op.length {
    88  			str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
    89  				"has %d remaining", op.name, op.length, len(script))
    90  			t.err = scriptError(ErrMalformedPush, str)
    91  			return false
    92  		}
    93  
    94  		// Move the offset forward and set the opcode and data accordingly.
    95  		t.offset += int32(op.length)
    96  		t.op = op
    97  		t.data = script[1:op.length]
    98  		return true
    99  
   100  	// Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}.
   101  	case op.length < 0:
   102  		script := t.script[t.offset+1:]
   103  		if len(script) < -op.length {
   104  			str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
   105  				"has %d remaining", op.name, -op.length, len(script))
   106  			t.err = scriptError(ErrMalformedPush, str)
   107  			return false
   108  		}
   109  
   110  		// Next -length bytes are little endian length of data.
   111  		var dataLen int32
   112  		switch op.length {
   113  		case -1:
   114  			dataLen = int32(script[0])
   115  		case -2:
   116  			dataLen = int32(binary.LittleEndian.Uint16(script[:2]))
   117  		case -4:
   118  			dataLen = int32(binary.LittleEndian.Uint32(script[:4]))
   119  		default:
   120  			// In practice it should be impossible to hit this
   121  			// check as each op code is predefined, and only uses
   122  			// the specified lengths.
   123  			str := fmt.Sprintf("invalid opcode length %d", op.length)
   124  			t.err = scriptError(ErrMalformedPush, str)
   125  			return false
   126  		}
   127  
   128  		// Move to the beginning of the data.
   129  		script = script[-op.length:]
   130  
   131  		// Disallow entries that do not fit script or were sign extended.
   132  		if dataLen > int32(len(script)) || dataLen < 0 {
   133  			str := fmt.Sprintf("opcode %s pushes %d bytes, but script only "+
   134  				"has %d remaining", op.name, dataLen, len(script))
   135  			t.err = scriptError(ErrMalformedPush, str)
   136  			return false
   137  		}
   138  
   139  		// Move the offset forward and set the opcode and data accordingly.
   140  		t.offset += 1 + int32(-op.length) + dataLen
   141  		t.op = op
   142  		t.data = script[:dataLen]
   143  		return true
   144  	}
   145  
   146  	// The only remaining case is an opcode with length zero which is
   147  	// impossible.
   148  	panic("unreachable")
   149  }
   150  
   151  // Script returns the full script associated with the tokenizer.
   152  func (t *ScriptTokenizer) Script() []byte {
   153  	return t.script
   154  }
   155  
   156  // ByteIndex returns the current offset into the full script that will be parsed
   157  // next and therefore also implies everything before it has already been parsed.
   158  func (t *ScriptTokenizer) ByteIndex() int32 {
   159  	return t.offset
   160  }
   161  
   162  // OpcodePosition returns the current op code counter. Unlike the ByteIndex
   163  // above (referred to as the program counter or pc at times), this is
   164  // incremented with each node op code, and isn't incremented more than once for
   165  // push datas.
   166  //
   167  // NOTE: If no op codes have been parsed, this returns -1.
   168  func (t *ScriptTokenizer) OpcodePosition() int32 {
   169  	return t.opcodePos
   170  }
   171  
   172  // Opcode returns the current opcode associated with the tokenizer.
   173  func (t *ScriptTokenizer) Opcode() byte {
   174  	return t.op.value
   175  }
   176  
   177  // Data returns the data associated with the most recently successfully parsed
   178  // opcode.
   179  func (t *ScriptTokenizer) Data() []byte {
   180  	return t.data
   181  }
   182  
   183  // Err returns any errors currently associated with the tokenizer.  This will
   184  // only be non-nil in the case a parsing error was encountered.
   185  func (t *ScriptTokenizer) Err() error {
   186  	return t.err
   187  }
   188  
   189  // MakeScriptTokenizer returns a new instance of a script tokenizer.  Passing
   190  // an unsupported script version will result in the returned tokenizer
   191  // immediately having an err set accordingly.
   192  //
   193  // See the docs for ScriptTokenizer for more details.
   194  func MakeScriptTokenizer(scriptVersion uint16, script []byte) ScriptTokenizer {
   195  	// Only version 0 scripts are currently supported.
   196  	var err error
   197  	if scriptVersion != 0 {
   198  		str := fmt.Sprintf("script version %d is not supported", scriptVersion)
   199  		err = scriptError(ErrUnsupportedScriptVersion, str)
   200  
   201  	}
   202  	return ScriptTokenizer{
   203  		version: scriptVersion,
   204  		script:  script,
   205  		err:     err,
   206  		// We use a value of negative 1 here so the first op code has a value of 0.
   207  		opcodePos: -1,
   208  	}
   209  }