github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/cmd/noms/noms_cat.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "context" 19 "encoding/base32" 20 "encoding/binary" 21 "encoding/hex" 22 "fmt" 23 "hash/crc32" 24 "io" 25 "os" 26 "path/filepath" 27 "strconv" 28 29 "github.com/golang/snappy" 30 flag "github.com/juju/gnuflag" 31 32 "github.com/dolthub/dolt/go/store/chunks" 33 "github.com/dolthub/dolt/go/store/cmd/noms/util" 34 "github.com/dolthub/dolt/go/store/d" 35 "github.com/dolthub/dolt/go/store/hash" 36 "github.com/dolthub/dolt/go/store/spec" 37 "github.com/dolthub/dolt/go/store/types" 38 ) 39 40 const ( 41 u64Size = 8 42 u32Size = 4 43 crcSize = u32Size 44 prefixSize = u64Size 45 ordinalSize = u32Size 46 chunkSizeSize = u32Size 47 suffixSize = 12 48 chunkCntSize = u32Size 49 totalUncmpSize = u64Size 50 magicSize = u64Size 51 52 magicNumber uint64 = 0xffb5d8c22463ee50 53 ) 54 55 var ( 56 catRaw = false 57 catDecomp = false 58 catNoShow = false 59 catNoRefs = false 60 catHashesOnly = false 61 ) 62 63 var nomsCat = &util.Command{ 64 Run: runCat, 65 UsageLine: "cat <file>", 66 Short: "Print the contents of a chunk file", 67 Long: "Print the contents of a chunk file", 68 Flags: setupCatFlags, 69 Nargs: 1, 70 } 71 72 func setupCatFlags() *flag.FlagSet { 73 catFlagSet := flag.NewFlagSet("cat", flag.ExitOnError) 74 catFlagSet.BoolVar(&catRaw, "raw", false, "If true, includes the raw binary version of each chunk in the nbs file") 75 catFlagSet.BoolVar(&catNoShow, "no-show", false, "If true, skips printing of the value") 76 catFlagSet.BoolVar(&catNoRefs, "no-refs", false, "If true, skips printing of the refs") 77 catFlagSet.BoolVar(&catHashesOnly, "hashes-only", false, "If true, only prints the b32 hashes") 78 catFlagSet.BoolVar(&catDecomp, "decompressed", false, "If true, includes the decompressed binary version of each chunk in the nbs file") 79 return catFlagSet 80 } 81 82 type footer struct { 83 chunkCnt uint32 84 uncompSize uint64 85 magicMatch bool 86 } 87 88 type prefixIndex struct { 89 hashPrefix []byte 90 chunkIndex uint32 91 } 92 93 type chunkData struct { 94 compressed []byte 95 uncompressed []byte 96 dataOffset uint64 97 crc uint32 98 decompSuccess bool 99 } 100 101 func runCat(ctx context.Context, args []string) int { 102 if len(args) < 1 { 103 fmt.Fprintln(os.Stderr, "Not enough arguments") 104 return 0 105 } 106 107 chunkFile := args[0] 108 _, err := os.Stat(chunkFile) 109 110 if err != nil { 111 fmt.Fprintln(os.Stderr, chunkFile+" does not exist") 112 return 1 113 } 114 115 fileBytes, err := os.ReadFile(chunkFile) 116 117 if err != nil { 118 fmt.Fprintln(os.Stderr, "Failed to read "+chunkFile, err) 119 return 1 120 } 121 122 //read the file backwards 123 pos := len(fileBytes) 124 pos, footer := parseFooter(fileBytes, pos) 125 pos, suffixes := parseChunkSuffixes(fileBytes, pos, int(footer.chunkCnt)) 126 pos, sizes := parseChunkSizes(fileBytes, pos, int(footer.chunkCnt)) 127 pos, pi := parsePrefixIndices(fileBytes, pos, int(footer.chunkCnt)) 128 pos, cd := parseChunks(fileBytes, pos, sizes) 129 130 fmt.Println("Info for file", chunkFile+":") 131 fmt.Printf(" chunk count: %d\n", footer.chunkCnt) 132 fmt.Printf(" total uncompressed chunk size: %d\n", footer.uncompSize) 133 fmt.Printf(" magic number matches: %t\n", footer.magicMatch) 134 fmt.Println() 135 136 fmt.Println("Prefix Indices:") 137 for i, currPI := range pi { 138 var hashData [20]byte 139 140 cidx := currPI.chunkIndex 141 copy(hashData[:], currPI.hashPrefix) 142 copy(hashData[prefixSize:], suffixes[cidx]) 143 b32Hash := b32Str(hashData[:]) 144 145 currCD := cd[cidx] 146 147 if catHashesOnly { 148 fmt.Println("hash:", b32Hash, "offset:", currCD.dataOffset, "size:", len(currCD.compressed)) 149 continue 150 } 151 152 fmt.Printf(" prefixIndex[%d].hash: (HEX) %s (B32) %s\n", i, hexStr(hashData[:]), b32Hash) 153 fmt.Printf(" prefixIndex[%d].hash.prefix: (HEX) %s\n", i, hexStr(currPI.hashPrefix)) 154 fmt.Printf(" prefixIndex[%d].hash.suffix: (HEX) %s\n", i, hexStr(suffixes[cidx])) 155 fmt.Println() 156 157 fmt.Printf(" prefixIndex[%d] references chunk[%d]:\n", i, cidx) 158 159 chunk := chunks.NewChunkWithHash(hashData, currCD.uncompressed) 160 161 //Want a clean db every loop 162 sp, _ := spec.ForDatabase("mem") 163 vrw := sp.GetVRW(ctx) 164 waf := types.WalkAddrsForNBF(vrw.Format(), nil) 165 166 fmt.Printf(" chunk[%d].raw.len: %d\n", cidx, len(currCD.compressed)) 167 168 if catRaw { 169 fmt.Printf(" chunk[%d].raw.crc: %08x\n", cidx, currCD.crc) 170 fmt.Printf(" chunk[%d].raw.data:\n", cidx) 171 fmt.Println(hexView(currCD.compressed, " ")) 172 } 173 174 fmt.Printf(" chunk[%d].decomp.len: %d\n", cidx, len(currCD.uncompressed)) 175 176 if catDecomp { 177 fmt.Printf(" chunk[%d].decomp.data:\n", cidx) 178 fmt.Println(hexView(currCD.uncompressed, " ")) 179 } 180 181 if !catNoShow { 182 value, err := types.DecodeValue(chunk, vrw) 183 184 if err != nil { 185 fmt.Println(" error reading value (Could be a format issue).") 186 continue 187 } 188 189 fmt.Printf(" chunk[%d].value.kind: %s\n", cidx, value.Kind()) 190 fmt.Printf(" chunk[%d].value:\n\n", cidx) 191 printValue(ctx, os.Stdout, value, filepath.Dir(chunkFile)+"::#"+b32Hash) 192 fmt.Println() 193 } 194 195 if !catNoRefs { 196 refIdx := 0 197 err = waf(chunk, func(addr hash.Hash, _ bool) error { 198 if refIdx == 0 { 199 fmt.Printf(" chunk[%d] references chunks:\n", cidx) 200 } 201 202 fmt.Printf(" Ref Hash: %s\n", addr.String()) 203 refIdx++ 204 205 return nil 206 }) 207 } 208 209 d.PanicIfError(err) 210 fmt.Println() 211 } 212 213 if pos != 0 { 214 panic("Didn't read the whole file") 215 } 216 217 return 0 218 } 219 220 func parseFooter(bytes []byte, pos int) (int, footer) { 221 magicBytes := bytes[pos-magicSize : pos] 222 pos -= magicSize 223 224 totalSizeBytes := bytes[pos-totalUncmpSize : pos] 225 pos -= totalUncmpSize 226 227 chunkCntBytes := bytes[pos-chunkCntSize : pos] 228 pos -= chunkCntSize 229 230 return pos, footer{ 231 chunkCnt: binary.BigEndian.Uint32(chunkCntBytes), 232 uncompSize: binary.BigEndian.Uint64(totalSizeBytes), 233 magicMatch: binary.BigEndian.Uint64(magicBytes) == magicNumber, 234 } 235 } 236 237 func parsePrefixIndices(bytes []byte, pos, numChunks int) (int, []prefixIndex) { 238 var hashPrefixes [][]byte 239 var ordinals []uint32 240 for i := 0; i < numChunks; i++ { 241 ordinalBytes := bytes[pos-ordinalSize : pos] 242 pos -= ordinalSize 243 244 hashPrefixBytes := bytes[pos-prefixSize : pos] 245 pos -= prefixSize 246 247 hashPrefixes = append(hashPrefixes, hashPrefixBytes) 248 ordinals = append(ordinals, binary.BigEndian.Uint32(ordinalBytes)) 249 } 250 251 var indices []prefixIndex 252 for i := numChunks - 1; i >= 0; i-- { 253 indices = append(indices, prefixIndex{ 254 hashPrefix: hashPrefixes[i], 255 chunkIndex: ordinals[i], 256 }) 257 } 258 259 return pos, indices 260 } 261 262 func parseChunkSuffixes(bytes []byte, pos, numChunks int) (int, [][]byte) { 263 pos -= suffixSize * numChunks 264 265 var suffixes [][]byte 266 for i := 0; i < numChunks; i++ { 267 start := pos + (i * suffixSize) 268 suffixes = append(suffixes, bytes[start:start+suffixSize]) 269 } 270 271 return pos, suffixes 272 } 273 274 func parseChunkSizes(bytes []byte, pos, numChunks int) (int, []int) { 275 pos -= chunkSizeSize * numChunks 276 277 var sizes []int 278 for i := 0; i < numChunks; i++ { 279 start := pos + (i * chunkSizeSize) 280 sizeBytes := bytes[start : start+chunkSizeSize] 281 282 sizes = append(sizes, int(binary.BigEndian.Uint32(sizeBytes))) 283 } 284 285 return pos, sizes 286 } 287 288 func parseChunks(bytes []byte, pos int, sizes []int) (int, []chunkData) { 289 var crcs []uint32 290 var offsets []uint64 291 var chunkBytes [][]byte 292 for i := 0; i < len(sizes); i++ { 293 size := sizes[len(sizes)-i-1] 294 crcBytes := bytes[pos-crcSize : pos] 295 offset := uint64(pos - size) 296 dataBytes := bytes[offset : pos-crcSize] 297 pos -= size 298 299 crcValInFile := binary.BigEndian.Uint32(crcBytes) 300 crcOfData := crc(dataBytes) 301 302 if crcValInFile != crcOfData { 303 panic("CRC MISMATCH!!!") 304 } 305 306 chunkBytes = append(chunkBytes, dataBytes) 307 crcs = append(crcs, crcValInFile) 308 offsets = append(offsets, offset) 309 } 310 311 var cd []chunkData 312 for i := len(sizes) - 1; i >= 0; i-- { 313 uncompressed, err := snappy.Decode(nil, chunkBytes[i]) 314 d.PanicIfError(err) 315 316 cd = append(cd, chunkData{ 317 compressed: chunkBytes[i], 318 uncompressed: uncompressed, 319 crc: crcs[i], 320 dataOffset: offsets[i], 321 decompSuccess: err == nil, 322 }) 323 } 324 325 return pos, cd 326 } 327 328 func printValue(ctx context.Context, w io.Writer, v types.Value, valSpec string) { 329 defer func() { 330 if r := recover(); r != nil { 331 msg := " Failed to write the value " + valSpec + "\n" 332 io.WriteString(w, msg) 333 } 334 }() 335 336 types.WriteEncodedValue(ctx, w, v) 337 } 338 339 func hexStr(bytes []byte) string { 340 return hex.EncodeToString(bytes) 341 } 342 343 const bytesPerRow = 16 344 345 func max(i, j int) int { 346 if i > j { 347 return i 348 } 349 return j 350 } 351 352 func min(i, j int) int { 353 if i < j { 354 return i 355 } 356 return j 357 } 358 359 func hexView(bytes []byte, indent string) string { 360 str := "" 361 for i := 0; i < len(bytes); i += bytesPerRow { 362 rowLen := min(16, len(bytes)-i) 363 rowBytes := bytes[i : i+rowLen] 364 str += indent + hexViewRow(i, rowBytes) + "\n" 365 } 366 367 return str 368 } 369 370 func hexViewRow(firstByteIndex int, rowBytes []byte) string { 371 addr := fmt.Sprintf("%04x", firstByteIndex) 372 373 hexWords := "" 374 for i, b := range rowBytes { 375 hexWords += fmt.Sprintf("%02x", b) 376 377 if i%2 == 1 { 378 hexWords += " " 379 } 380 381 if i%8 == 7 { 382 hexWords += " " 383 } 384 } 385 hexWidth := (bytesPerRow * 2) + (bytesPerRow)/2 + (bytesPerRow)/8 386 387 var charRep []byte 388 for _, b := range rowBytes { 389 if b < 32 || b > 126 { 390 charRep = append(charRep, byte('.')) 391 } else { 392 charRep = append(charRep, b) 393 } 394 } 395 396 formatStr := `%s: %-` + strconv.Itoa(hexWidth) + `s %s` 397 return fmt.Sprintf(formatStr, addr, hexWords, charRep) 398 } 399 400 var b32encoder = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") 401 402 func b32Str(bytes []byte) string { 403 return b32encoder.EncodeToString(bytes) 404 } 405 406 var crcTable = crc32.MakeTable(crc32.Castagnoli) 407 408 func crc(b []byte) uint32 { 409 return crc32.Update(0, crcTable, b) 410 }