github.com/btcsuite/btcd@v0.24.0/blockchain/compress.go (about) 1 // Copyright (c) 2015-2016 The btcsuite developers 2 // Use of this source code is governed by an ISC 3 // license that can be found in the LICENSE file. 4 5 package blockchain 6 7 import ( 8 "github.com/btcsuite/btcd/btcec/v2" 9 "github.com/btcsuite/btcd/txscript" 10 ) 11 12 // ----------------------------------------------------------------------------- 13 // A variable length quantity (VLQ) is an encoding that uses an arbitrary number 14 // of binary octets to represent an arbitrarily large integer. The scheme 15 // employs a most significant byte (MSB) base-128 encoding where the high bit in 16 // each byte indicates whether or not the byte is the final one. In addition, 17 // to ensure there are no redundant encodings, an offset is subtracted every 18 // time a group of 7 bits is shifted out. Therefore each integer can be 19 // represented in exactly one way, and each representation stands for exactly 20 // one integer. 21 // 22 // Another nice property of this encoding is that it provides a compact 23 // representation of values that are typically used to indicate sizes. For 24 // example, the values 0 - 127 are represented with a single byte, 128 - 16511 25 // with two bytes, and 16512 - 2113663 with three bytes. 26 // 27 // While the encoding allows arbitrarily large integers, it is artificially 28 // limited in this code to an unsigned 64-bit integer for efficiency purposes. 29 // 30 // Example encodings: 31 // 0 -> [0x00] 32 // 127 -> [0x7f] * Max 1-byte value 33 // 128 -> [0x80 0x00] 34 // 129 -> [0x80 0x01] 35 // 255 -> [0x80 0x7f] 36 // 256 -> [0x81 0x00] 37 // 16511 -> [0xff 0x7f] * Max 2-byte value 38 // 16512 -> [0x80 0x80 0x00] 39 // 32895 -> [0x80 0xff 0x7f] 40 // 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value 41 // 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value 42 // 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f] 43 // 44 // References: 45 // https://en.wikipedia.org/wiki/Variable-length_quantity 46 // http://www.codecodex.com/wiki/Variable-Length_Integers 47 // ----------------------------------------------------------------------------- 48 49 // serializeSizeVLQ returns the number of bytes it would take to serialize the 50 // passed number as a variable-length quantity according to the format described 51 // above. 52 func serializeSizeVLQ(n uint64) int { 53 size := 1 54 for ; n > 0x7f; n = (n >> 7) - 1 { 55 size++ 56 } 57 58 return size 59 } 60 61 // putVLQ serializes the provided number to a variable-length quantity according 62 // to the format described above and returns the number of bytes of the encoded 63 // value. The result is placed directly into the passed byte slice which must 64 // be at least large enough to handle the number of bytes returned by the 65 // serializeSizeVLQ function or it will panic. 66 func putVLQ(target []byte, n uint64) int { 67 offset := 0 68 for ; ; offset++ { 69 // The high bit is set when another byte follows. 70 highBitMask := byte(0x80) 71 if offset == 0 { 72 highBitMask = 0x00 73 } 74 75 target[offset] = byte(n&0x7f) | highBitMask 76 if n <= 0x7f { 77 break 78 } 79 n = (n >> 7) - 1 80 } 81 82 // Reverse the bytes so it is MSB-encoded. 83 for i, j := 0, offset; i < j; i, j = i+1, j-1 { 84 target[i], target[j] = target[j], target[i] 85 } 86 87 return offset + 1 88 } 89 90 // deserializeVLQ deserializes the provided variable-length quantity according 91 // to the format described above. It also returns the number of bytes 92 // deserialized. 93 func deserializeVLQ(serialized []byte) (uint64, int) { 94 var n uint64 95 var size int 96 for _, val := range serialized { 97 size++ 98 n = (n << 7) | uint64(val&0x7f) 99 if val&0x80 != 0x80 { 100 break 101 } 102 n++ 103 } 104 105 return n, size 106 } 107 108 // ----------------------------------------------------------------------------- 109 // In order to reduce the size of stored scripts, a domain specific compression 110 // algorithm is used which recognizes standard scripts and stores them using 111 // less bytes than the original script. The compression algorithm used here was 112 // obtained from Bitcoin Core, so all credits for the algorithm go to it. 113 // 114 // The general serialized format is: 115 // 116 // <script size or type><script data> 117 // 118 // Field Type Size 119 // script size or type VLQ variable 120 // script data []byte variable 121 // 122 // The specific serialized format for each recognized standard script is: 123 // 124 // - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash> 125 // - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash> 126 // - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value> 127 // 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use 128 // 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use 129 // ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported. 130 // 131 // Any scripts which are not recognized as one of the aforementioned standard 132 // scripts are encoded using the general serialized format and encode the script 133 // size as the sum of the actual size of the script and the number of special 134 // cases. 135 // ----------------------------------------------------------------------------- 136 137 // The following constants specify the special constants used to identify a 138 // special script type in the domain-specific compressed script encoding. 139 // 140 // NOTE: This section specifically does not use iota since these values are 141 // serialized and must be stable for long-term storage. 142 const ( 143 // cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script. 144 cstPayToPubKeyHash = 0 145 146 // cstPayToScriptHash identifies a compressed pay-to-script-hash script. 147 cstPayToScriptHash = 1 148 149 // cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to 150 // a compressed pubkey. Bit 0 specifies which y-coordinate to use 151 // to reconstruct the full uncompressed pubkey. 152 cstPayToPubKeyComp2 = 2 153 154 // cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to 155 // a compressed pubkey. Bit 0 specifies which y-coordinate to use 156 // to reconstruct the full uncompressed pubkey. 157 cstPayToPubKeyComp3 = 3 158 159 // cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to 160 // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use 161 // to reconstruct the full uncompressed pubkey. 162 cstPayToPubKeyUncomp4 = 4 163 164 // cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to 165 // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use 166 // to reconstruct the full uncompressed pubkey. 167 cstPayToPubKeyUncomp5 = 5 168 169 // numSpecialScripts is the number of special scripts recognized by the 170 // domain-specific script compression algorithm. 171 numSpecialScripts = 6 172 ) 173 174 // isPubKeyHash returns whether or not the passed public key script is a 175 // standard pay-to-pubkey-hash script along with the pubkey hash it is paying to 176 // if it is. 177 func isPubKeyHash(script []byte) (bool, []byte) { 178 if len(script) == 25 && script[0] == txscript.OP_DUP && 179 script[1] == txscript.OP_HASH160 && 180 script[2] == txscript.OP_DATA_20 && 181 script[23] == txscript.OP_EQUALVERIFY && 182 script[24] == txscript.OP_CHECKSIG { 183 184 return true, script[3:23] 185 } 186 187 return false, nil 188 } 189 190 // isScriptHash returns whether or not the passed public key script is a 191 // standard pay-to-script-hash script along with the script hash it is paying to 192 // if it is. 193 func isScriptHash(script []byte) (bool, []byte) { 194 if len(script) == 23 && script[0] == txscript.OP_HASH160 && 195 script[1] == txscript.OP_DATA_20 && 196 script[22] == txscript.OP_EQUAL { 197 198 return true, script[2:22] 199 } 200 201 return false, nil 202 } 203 204 // isPubKey returns whether or not the passed public key script is a standard 205 // pay-to-pubkey script that pays to a valid compressed or uncompressed public 206 // key along with the serialized pubkey it is paying to if it is. 207 // 208 // NOTE: This function ensures the public key is actually valid since the 209 // compression algorithm requires valid pubkeys. It does not support hybrid 210 // pubkeys. This means that even if the script has the correct form for a 211 // pay-to-pubkey script, this function will only return true when it is paying 212 // to a valid compressed or uncompressed pubkey. 213 func isPubKey(script []byte) (bool, []byte) { 214 // Pay-to-compressed-pubkey script. 215 if len(script) == 35 && script[0] == txscript.OP_DATA_33 && 216 script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 || 217 script[1] == 0x03) { 218 219 // Ensure the public key is valid. 220 serializedPubKey := script[1:34] 221 _, err := btcec.ParsePubKey(serializedPubKey) 222 if err == nil { 223 return true, serializedPubKey 224 } 225 } 226 227 // Pay-to-uncompressed-pubkey script. 228 if len(script) == 67 && script[0] == txscript.OP_DATA_65 && 229 script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 { 230 231 // Ensure the public key is valid. 232 serializedPubKey := script[1:66] 233 _, err := btcec.ParsePubKey(serializedPubKey) 234 if err == nil { 235 return true, serializedPubKey 236 } 237 } 238 239 return false, nil 240 } 241 242 // compressedScriptSize returns the number of bytes the passed script would take 243 // when encoded with the domain specific compression algorithm described above. 244 func compressedScriptSize(pkScript []byte) int { 245 // Pay-to-pubkey-hash script. 246 if valid, _ := isPubKeyHash(pkScript); valid { 247 return 21 248 } 249 250 // Pay-to-script-hash script. 251 if valid, _ := isScriptHash(pkScript); valid { 252 return 21 253 } 254 255 // Pay-to-pubkey (compressed or uncompressed) script. 256 if valid, _ := isPubKey(pkScript); valid { 257 return 33 258 } 259 260 // When none of the above special cases apply, encode the script as is 261 // preceded by the sum of its size and the number of special cases 262 // encoded as a variable length quantity. 263 return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) + 264 len(pkScript) 265 } 266 267 // decodeCompressedScriptSize treats the passed serialized bytes as a compressed 268 // script, possibly followed by other data, and returns the number of bytes it 269 // occupies taking into account the special encoding of the script size by the 270 // domain specific compression algorithm described above. 271 func decodeCompressedScriptSize(serialized []byte) int { 272 scriptSize, bytesRead := deserializeVLQ(serialized) 273 if bytesRead == 0 { 274 return 0 275 } 276 277 switch scriptSize { 278 case cstPayToPubKeyHash: 279 return 21 280 281 case cstPayToScriptHash: 282 return 21 283 284 case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4, 285 cstPayToPubKeyUncomp5: 286 return 33 287 } 288 289 scriptSize -= numSpecialScripts 290 scriptSize += uint64(bytesRead) 291 return int(scriptSize) 292 } 293 294 // putCompressedScript compresses the passed script according to the domain 295 // specific compression algorithm described above directly into the passed 296 // target byte slice. The target byte slice must be at least large enough to 297 // handle the number of bytes returned by the compressedScriptSize function or 298 // it will panic. 299 func putCompressedScript(target, pkScript []byte) int { 300 // Pay-to-pubkey-hash script. 301 if valid, hash := isPubKeyHash(pkScript); valid { 302 target[0] = cstPayToPubKeyHash 303 copy(target[1:21], hash) 304 return 21 305 } 306 307 // Pay-to-script-hash script. 308 if valid, hash := isScriptHash(pkScript); valid { 309 target[0] = cstPayToScriptHash 310 copy(target[1:21], hash) 311 return 21 312 } 313 314 // Pay-to-pubkey (compressed or uncompressed) script. 315 if valid, serializedPubKey := isPubKey(pkScript); valid { 316 pubKeyFormat := serializedPubKey[0] 317 switch pubKeyFormat { 318 case 0x02, 0x03: 319 target[0] = pubKeyFormat 320 copy(target[1:33], serializedPubKey[1:33]) 321 return 33 322 case 0x04: 323 // Encode the oddness of the serialized pubkey into the 324 // compressed script type. 325 target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01) 326 copy(target[1:33], serializedPubKey[1:33]) 327 return 33 328 } 329 } 330 331 // When none of the above special cases apply, encode the unmodified 332 // script preceded by the sum of its size and the number of special 333 // cases encoded as a variable length quantity. 334 encodedSize := uint64(len(pkScript) + numSpecialScripts) 335 vlqSizeLen := putVLQ(target, encodedSize) 336 copy(target[vlqSizeLen:], pkScript) 337 return vlqSizeLen + len(pkScript) 338 } 339 340 // decompressScript returns the original script obtained by decompressing the 341 // passed compressed script according to the domain specific compression 342 // algorithm described above. 343 // 344 // NOTE: The script parameter must already have been proven to be long enough 345 // to contain the number of bytes returned by decodeCompressedScriptSize or it 346 // will panic. This is acceptable since it is only an internal function. 347 func decompressScript(compressedPkScript []byte) []byte { 348 // In practice this function will not be called with a zero-length or 349 // nil script since the nil script encoding includes the length, however 350 // the code below assumes the length exists, so just return nil now if 351 // the function ever ends up being called with a nil script in the 352 // future. 353 if len(compressedPkScript) == 0 { 354 return nil 355 } 356 357 // Decode the script size and examine it for the special cases. 358 encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript) 359 switch encodedScriptSize { 360 // Pay-to-pubkey-hash script. The resulting script is: 361 // <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG> 362 case cstPayToPubKeyHash: 363 pkScript := make([]byte, 25) 364 pkScript[0] = txscript.OP_DUP 365 pkScript[1] = txscript.OP_HASH160 366 pkScript[2] = txscript.OP_DATA_20 367 copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20]) 368 pkScript[23] = txscript.OP_EQUALVERIFY 369 pkScript[24] = txscript.OP_CHECKSIG 370 return pkScript 371 372 // Pay-to-script-hash script. The resulting script is: 373 // <OP_HASH160><20 byte script hash><OP_EQUAL> 374 case cstPayToScriptHash: 375 pkScript := make([]byte, 23) 376 pkScript[0] = txscript.OP_HASH160 377 pkScript[1] = txscript.OP_DATA_20 378 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20]) 379 pkScript[22] = txscript.OP_EQUAL 380 return pkScript 381 382 // Pay-to-compressed-pubkey script. The resulting script is: 383 // <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG> 384 case cstPayToPubKeyComp2, cstPayToPubKeyComp3: 385 pkScript := make([]byte, 35) 386 pkScript[0] = txscript.OP_DATA_33 387 pkScript[1] = byte(encodedScriptSize) 388 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32]) 389 pkScript[34] = txscript.OP_CHECKSIG 390 return pkScript 391 392 // Pay-to-uncompressed-pubkey script. The resulting script is: 393 // <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG> 394 case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5: 395 // Change the leading byte to the appropriate compressed pubkey 396 // identifier (0x02 or 0x03) so it can be decoded as a 397 // compressed pubkey. This really should never fail since the 398 // encoding ensures it is valid before compressing to this type. 399 compressedKey := make([]byte, 33) 400 compressedKey[0] = byte(encodedScriptSize - 2) 401 copy(compressedKey[1:], compressedPkScript[1:]) 402 key, err := btcec.ParsePubKey(compressedKey) 403 if err != nil { 404 return nil 405 } 406 407 pkScript := make([]byte, 67) 408 pkScript[0] = txscript.OP_DATA_65 409 copy(pkScript[1:], key.SerializeUncompressed()) 410 pkScript[66] = txscript.OP_CHECKSIG 411 return pkScript 412 } 413 414 // When none of the special cases apply, the script was encoded using 415 // the general format, so reduce the script size by the number of 416 // special cases and return the unmodified script. 417 scriptSize := int(encodedScriptSize - numSpecialScripts) 418 pkScript := make([]byte, scriptSize) 419 copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize]) 420 return pkScript 421 } 422 423 // ----------------------------------------------------------------------------- 424 // In order to reduce the size of stored amounts, a domain specific compression 425 // algorithm is used which relies on there typically being a lot of zeroes at 426 // end of the amounts. The compression algorithm used here was obtained from 427 // Bitcoin Core, so all credits for the algorithm go to it. 428 // 429 // While this is simply exchanging one uint64 for another, the resulting value 430 // for typical amounts has a much smaller magnitude which results in fewer bytes 431 // when encoded as variable length quantity. For example, consider the amount 432 // of 0.1 BTC which is 10000000 satoshi. Encoding 10000000 as a VLQ would take 433 // 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte. 434 // 435 // Essentially the compression is achieved by splitting the value into an 436 // exponent in the range [0-9] and a digit in the range [1-9], when possible, 437 // and encoding them in a way that can be decoded. More specifically, the 438 // encoding is as follows: 439 // - 0 is 0 440 // - Find the exponent, e, as the largest power of 10 that evenly divides the 441 // value up to a maximum of 9 442 // - When e < 9, the final digit can't be 0 so store it as d and remove it by 443 // dividing the value by 10 (call the result n). The encoded value is thus: 444 // 1 + 10*(9*n + d-1) + e 445 // - When e==9, the only thing known is the amount is not 0. The encoded value 446 // is thus: 447 // 1 + 10*(n-1) + e == 10 + 10*(n-1) 448 // 449 // Example encodings: 450 // (The numbers in parenthesis are the number of bytes when serialized as a VLQ) 451 // 0 (1) -> 0 (1) * 0.00000000 BTC 452 // 1000 (2) -> 4 (1) * 0.00001000 BTC 453 // 10000 (2) -> 5 (1) * 0.00010000 BTC 454 // 12345678 (4) -> 111111101(4) * 0.12345678 BTC 455 // 50000000 (4) -> 47 (1) * 0.50000000 BTC 456 // 100000000 (4) -> 9 (1) * 1.00000000 BTC 457 // 500000000 (5) -> 49 (1) * 5.00000000 BTC 458 // 1000000000 (5) -> 10 (1) * 10.00000000 BTC 459 // ----------------------------------------------------------------------------- 460 461 // compressTxOutAmount compresses the passed amount according to the domain 462 // specific compression algorithm described above. 463 func compressTxOutAmount(amount uint64) uint64 { 464 // No need to do any work if it's zero. 465 if amount == 0 { 466 return 0 467 } 468 469 // Find the largest power of 10 (max of 9) that evenly divides the 470 // value. 471 exponent := uint64(0) 472 for amount%10 == 0 && exponent < 9 { 473 amount /= 10 474 exponent++ 475 } 476 477 // The compressed result for exponents less than 9 is: 478 // 1 + 10*(9*n + d-1) + e 479 if exponent < 9 { 480 lastDigit := amount % 10 481 amount /= 10 482 return 1 + 10*(9*amount+lastDigit-1) + exponent 483 } 484 485 // The compressed result for an exponent of 9 is: 486 // 1 + 10*(n-1) + e == 10 + 10*(n-1) 487 return 10 + 10*(amount-1) 488 } 489 490 // decompressTxOutAmount returns the original amount the passed compressed 491 // amount represents according to the domain specific compression algorithm 492 // described above. 493 func decompressTxOutAmount(amount uint64) uint64 { 494 // No need to do any work if it's zero. 495 if amount == 0 { 496 return 0 497 } 498 499 // The decompressed amount is either of the following two equations: 500 // x = 1 + 10*(9*n + d - 1) + e 501 // x = 1 + 10*(n - 1) + 9 502 amount-- 503 504 // The decompressed amount is now one of the following two equations: 505 // x = 10*(9*n + d - 1) + e 506 // x = 10*(n - 1) + 9 507 exponent := amount % 10 508 amount /= 10 509 510 // The decompressed amount is now one of the following two equations: 511 // x = 9*n + d - 1 | where e < 9 512 // x = n - 1 | where e = 9 513 n := uint64(0) 514 if exponent < 9 { 515 lastDigit := amount%9 + 1 516 amount /= 9 517 n = amount*10 + lastDigit 518 } else { 519 n = amount + 1 520 } 521 522 // Apply the exponent. 523 for ; exponent > 0; exponent-- { 524 n *= 10 525 } 526 527 return n 528 } 529 530 // ----------------------------------------------------------------------------- 531 // Compressed transaction outputs consist of an amount and a public key script 532 // both compressed using the domain specific compression algorithms previously 533 // described. 534 // 535 // The serialized format is: 536 // 537 // <compressed amount><compressed script> 538 // 539 // Field Type Size 540 // compressed amount VLQ variable 541 // compressed script []byte variable 542 // ----------------------------------------------------------------------------- 543 544 // compressedTxOutSize returns the number of bytes the passed transaction output 545 // fields would take when encoded with the format described above. 546 func compressedTxOutSize(amount uint64, pkScript []byte) int { 547 return serializeSizeVLQ(compressTxOutAmount(amount)) + 548 compressedScriptSize(pkScript) 549 } 550 551 // putCompressedTxOut compresses the passed amount and script according to their 552 // domain specific compression algorithms and encodes them directly into the 553 // passed target byte slice with the format described above. The target byte 554 // slice must be at least large enough to handle the number of bytes returned by 555 // the compressedTxOutSize function or it will panic. 556 func putCompressedTxOut(target []byte, amount uint64, pkScript []byte) int { 557 offset := putVLQ(target, compressTxOutAmount(amount)) 558 offset += putCompressedScript(target[offset:], pkScript) 559 return offset 560 } 561 562 // decodeCompressedTxOut decodes the passed compressed txout, possibly followed 563 // by other data, into its uncompressed amount and script and returns them along 564 // with the number of bytes they occupied prior to decompression. 565 func decodeCompressedTxOut(serialized []byte) (uint64, []byte, int, error) { 566 // Deserialize the compressed amount and ensure there are bytes 567 // remaining for the compressed script. 568 compressedAmount, bytesRead := deserializeVLQ(serialized) 569 if bytesRead >= len(serialized) { 570 return 0, nil, bytesRead, errDeserialize("unexpected end of " + 571 "data after compressed amount") 572 } 573 574 // Decode the compressed script size and ensure there are enough bytes 575 // left in the slice for it. 576 scriptSize := decodeCompressedScriptSize(serialized[bytesRead:]) 577 if len(serialized[bytesRead:]) < scriptSize { 578 return 0, nil, bytesRead, errDeserialize("unexpected end of " + 579 "data after script size") 580 } 581 582 // Decompress and return the amount and script. 583 amount := decompressTxOutAmount(compressedAmount) 584 script := decompressScript(serialized[bytesRead : bytesRead+scriptSize]) 585 return amount, script, bytesRead + scriptSize, nil 586 }