github.com/btcsuite/btcd@v0.24.0/txscript/tokenizer.go (about) 1 // Copyright (c) 2019 The Decred developers 2 // Use of this source code is governed by an ISC 3 // license that can be found in the LICENSE file. 4 5 package txscript 6 7 import ( 8 "encoding/binary" 9 "fmt" 10 ) 11 12 // opcodeArrayRef is used to break initialization cycles. 13 var opcodeArrayRef *[256]opcode 14 15 func init() { 16 opcodeArrayRef = &opcodeArray 17 } 18 19 // ScriptTokenizer provides a facility for easily and efficiently tokenizing 20 // transaction scripts without creating allocations. Each successive opcode is 21 // parsed with the Next function, which returns false when iteration is 22 // complete, either due to successfully tokenizing the entire script or 23 // encountering a parse error. In the case of failure, the Err function may be 24 // used to obtain the specific parse error. 25 // 26 // Upon successfully parsing an opcode, the opcode and data associated with it 27 // may be obtained via the Opcode and Data functions, respectively. 28 // 29 // The ByteIndex function may be used to obtain the tokenizer's current offset 30 // into the raw script. 31 type ScriptTokenizer struct { 32 script []byte 33 version uint16 34 offset int32 35 opcodePos int32 36 op *opcode 37 data []byte 38 err error 39 } 40 41 // Done returns true when either all opcodes have been exhausted or a parse 42 // failure was encountered and therefore the state has an associated error. 43 func (t *ScriptTokenizer) Done() bool { 44 return t.err != nil || t.offset >= int32(len(t.script)) 45 } 46 47 // Next attempts to parse the next opcode and returns whether or not it was 48 // successful. It will not be successful if invoked when already at the end of 49 // the script, a parse failure is encountered, or an associated error already 50 // exists due to a previous parse failure. 51 // 52 // In the case of a true return, the parsed opcode and data can be obtained with 53 // the associated functions and the offset into the script will either point to 54 // the next opcode or the end of the script if the final opcode was parsed. 55 // 56 // In the case of a false return, the parsed opcode and data will be the last 57 // successfully parsed values (if any) and the offset into the script will 58 // either point to the failing opcode or the end of the script if the function 59 // was invoked when already at the end of the script. 60 // 61 // Invoking this function when already at the end of the script is not 62 // considered an error and will simply return false. 63 func (t *ScriptTokenizer) Next() bool { 64 if t.Done() { 65 return false 66 } 67 68 // Increment the op code position each time we attempt to parse the 69 // next op code. Note that since the starting value is -1 (no op codes 70 // parsed), by incrementing here, we start at 0, then 1, and so on for 71 // the other op codes. 72 t.opcodePos++ 73 74 op := &opcodeArrayRef[t.script[t.offset]] 75 switch { 76 // No additional data. Note that some of the opcodes, notably OP_1NEGATE, 77 // OP_0, and OP_[1-16] represent the data themselves. 78 case op.length == 1: 79 t.offset++ 80 t.op = op 81 t.data = nil 82 return true 83 84 // Data pushes of specific lengths -- OP_DATA_[1-75]. 85 case op.length > 1: 86 script := t.script[t.offset:] 87 if len(script) < op.length { 88 str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+ 89 "has %d remaining", op.name, op.length, len(script)) 90 t.err = scriptError(ErrMalformedPush, str) 91 return false 92 } 93 94 // Move the offset forward and set the opcode and data accordingly. 95 t.offset += int32(op.length) 96 t.op = op 97 t.data = script[1:op.length] 98 return true 99 100 // Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}. 101 case op.length < 0: 102 script := t.script[t.offset+1:] 103 if len(script) < -op.length { 104 str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+ 105 "has %d remaining", op.name, -op.length, len(script)) 106 t.err = scriptError(ErrMalformedPush, str) 107 return false 108 } 109 110 // Next -length bytes are little endian length of data. 111 var dataLen int32 112 switch op.length { 113 case -1: 114 dataLen = int32(script[0]) 115 case -2: 116 dataLen = int32(binary.LittleEndian.Uint16(script[:2])) 117 case -4: 118 dataLen = int32(binary.LittleEndian.Uint32(script[:4])) 119 default: 120 // In practice it should be impossible to hit this 121 // check as each op code is predefined, and only uses 122 // the specified lengths. 123 str := fmt.Sprintf("invalid opcode length %d", op.length) 124 t.err = scriptError(ErrMalformedPush, str) 125 return false 126 } 127 128 // Move to the beginning of the data. 129 script = script[-op.length:] 130 131 // Disallow entries that do not fit script or were sign extended. 132 if dataLen > int32(len(script)) || dataLen < 0 { 133 str := fmt.Sprintf("opcode %s pushes %d bytes, but script only "+ 134 "has %d remaining", op.name, dataLen, len(script)) 135 t.err = scriptError(ErrMalformedPush, str) 136 return false 137 } 138 139 // Move the offset forward and set the opcode and data accordingly. 140 t.offset += 1 + int32(-op.length) + dataLen 141 t.op = op 142 t.data = script[:dataLen] 143 return true 144 } 145 146 // The only remaining case is an opcode with length zero which is 147 // impossible. 148 panic("unreachable") 149 } 150 151 // Script returns the full script associated with the tokenizer. 152 func (t *ScriptTokenizer) Script() []byte { 153 return t.script 154 } 155 156 // ByteIndex returns the current offset into the full script that will be parsed 157 // next and therefore also implies everything before it has already been parsed. 158 func (t *ScriptTokenizer) ByteIndex() int32 { 159 return t.offset 160 } 161 162 // OpcodePosition returns the current op code counter. Unlike the ByteIndex 163 // above (referred to as the program counter or pc at times), this is 164 // incremented with each node op code, and isn't incremented more than once for 165 // push datas. 166 // 167 // NOTE: If no op codes have been parsed, this returns -1. 168 func (t *ScriptTokenizer) OpcodePosition() int32 { 169 return t.opcodePos 170 } 171 172 // Opcode returns the current opcode associated with the tokenizer. 173 func (t *ScriptTokenizer) Opcode() byte { 174 return t.op.value 175 } 176 177 // Data returns the data associated with the most recently successfully parsed 178 // opcode. 179 func (t *ScriptTokenizer) Data() []byte { 180 return t.data 181 } 182 183 // Err returns any errors currently associated with the tokenizer. This will 184 // only be non-nil in the case a parsing error was encountered. 185 func (t *ScriptTokenizer) Err() error { 186 return t.err 187 } 188 189 // MakeScriptTokenizer returns a new instance of a script tokenizer. Passing 190 // an unsupported script version will result in the returned tokenizer 191 // immediately having an err set accordingly. 192 // 193 // See the docs for ScriptTokenizer for more details. 194 func MakeScriptTokenizer(scriptVersion uint16, script []byte) ScriptTokenizer { 195 // Only version 0 scripts are currently supported. 196 var err error 197 if scriptVersion != 0 { 198 str := fmt.Sprintf("script version %d is not supported", scriptVersion) 199 err = scriptError(ErrUnsupportedScriptVersion, str) 200 201 } 202 return ScriptTokenizer{ 203 version: scriptVersion, 204 script: script, 205 err: err, 206 // We use a value of negative 1 here so the first op code has a value of 0. 207 opcodePos: -1, 208 } 209 }