github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/cmd/noms/noms_cat.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "context" 19 "encoding/base32" 20 "encoding/binary" 21 "encoding/hex" 22 "fmt" 23 "hash/crc32" 24 "io" 25 "io/ioutil" 26 "os" 27 "path/filepath" 28 "strconv" 29 30 "github.com/golang/snappy" 31 flag "github.com/juju/gnuflag" 32 33 "github.com/dolthub/dolt/go/store/chunks" 34 "github.com/dolthub/dolt/go/store/cmd/noms/util" 35 "github.com/dolthub/dolt/go/store/d" 36 "github.com/dolthub/dolt/go/store/spec" 37 "github.com/dolthub/dolt/go/store/types" 38 ) 39 40 const ( 41 u64Size = 8 42 u32Size = 4 43 crcSize = u32Size 44 prefixSize = u64Size 45 ordinalSize = u32Size 46 chunkSizeSize = u32Size 47 suffixSize = 12 48 chunkCntSize = u32Size 49 totalUncmpSize = u64Size 50 magicSize = u64Size 51 52 magicNumber uint64 = 0xffb5d8c22463ee50 53 ) 54 55 var ( 56 catRaw = false 57 catDecomp = false 58 catNoShow = false 59 catHashesOnly = false 60 ) 61 62 var nomsCat = &util.Command{ 63 Run: runCat, 64 UsageLine: "cat <file>", 65 Short: "Print the contents of a chunk file", 66 Long: "Print the contents of a chunk file", 67 Flags: setupCatFlags, 68 Nargs: 1, 69 } 70 71 func setupCatFlags() *flag.FlagSet { 72 catFlagSet := flag.NewFlagSet("cat", flag.ExitOnError) 73 catFlagSet.BoolVar(&catRaw, "raw", false, "If true, includes the raw binary version of each chunk in the nbs file") 74 catFlagSet.BoolVar(&catNoShow, "no-show", false, "If true, skips printing of the value") 75 catFlagSet.BoolVar(&catHashesOnly, "hashes-only", false, "If true, only prints the b32 hashes") 76 catFlagSet.BoolVar(&catDecomp, "decompressed", false, "If true, includes the decompressed binary version of each chunk in the nbs file") 77 return catFlagSet 78 } 79 80 type footer struct { 81 chunkCnt uint32 82 uncompSize uint64 83 magicMatch bool 84 } 85 86 type prefixIndex struct { 87 hashPrefix []byte 88 chunkIndex uint32 89 } 90 91 type chunkData struct { 92 compressed []byte 93 uncompressed []byte 94 dataOffset uint64 95 crc uint32 96 decompSuccess bool 97 } 98 99 func runCat(ctx context.Context, args []string) int { 100 if len(args) < 1 { 101 fmt.Fprintln(os.Stderr, "Not enough arguments") 102 return 0 103 } 104 105 chunkFile := args[0] 106 _, err := os.Stat(chunkFile) 107 108 if err != nil { 109 fmt.Fprintln(os.Stderr, chunkFile+" does not exist") 110 return 1 111 } 112 113 fileBytes, err := ioutil.ReadFile(chunkFile) 114 115 if err != nil { 116 fmt.Fprintln(os.Stderr, "Failed to read "+chunkFile, err) 117 return 1 118 } 119 120 //read the file backwards 121 pos := len(fileBytes) 122 pos, footer := parseFooter(fileBytes, pos) 123 pos, suffixes := parseChunkSuffixes(fileBytes, pos, int(footer.chunkCnt)) 124 pos, sizes := parseChunkSizes(fileBytes, pos, int(footer.chunkCnt)) 125 pos, pi := parsePrefixIndices(fileBytes, pos, int(footer.chunkCnt)) 126 pos, cd := parseChunks(fileBytes, pos, sizes) 127 128 fmt.Println("Info for file", chunkFile+":") 129 fmt.Printf(" chunk count: %d\n", footer.chunkCnt) 130 fmt.Printf(" total uncompressed chunk size: %d\n", footer.uncompSize) 131 fmt.Printf(" magic number matches: %t\n", footer.magicMatch) 132 fmt.Println() 133 134 fmt.Println("Prefix Indices:") 135 for i, currPI := range pi { 136 var hashData [20]byte 137 138 cidx := currPI.chunkIndex 139 copy(hashData[:], currPI.hashPrefix) 140 copy(hashData[prefixSize:], suffixes[cidx]) 141 b32Hash := b32Str(hashData[:]) 142 143 currCD := cd[cidx] 144 145 if catHashesOnly { 146 fmt.Println("hash:", b32Hash, "offset:", currCD.dataOffset, "size:", len(currCD.compressed)) 147 continue 148 } 149 150 fmt.Printf(" prefixIndex[%d].hash: (HEX) %s (B32) %s\n", i, hexStr(hashData[:]), b32Hash) 151 fmt.Printf(" prefixIndex[%d].hash.prefix: (HEX) %s\n", i, hexStr(currPI.hashPrefix)) 152 fmt.Printf(" prefixIndex[%d].hash.suffix: (HEX) %s\n", i, hexStr(suffixes[cidx])) 153 fmt.Println() 154 155 fmt.Printf(" prefixIndex[%d] references chunk[%d]:\n", i, cidx) 156 157 chunk := chunks.NewChunkWithHash(hashData, currCD.uncompressed) 158 159 //Want a clean db every loop 160 sp, _ := spec.ForDatabase("mem") 161 db := sp.GetDatabase(ctx) 162 163 fmt.Printf(" chunk[%d].raw.len: %d\n", cidx, len(currCD.compressed)) 164 165 if catRaw { 166 fmt.Printf(" chunk[%d].raw.crc: %08x\n", cidx, currCD.crc) 167 fmt.Printf(" chunk[%d].raw.data:\n", cidx) 168 fmt.Println(hexView(currCD.compressed, " ")) 169 } 170 171 fmt.Printf(" chunk[%d].decomp.len: %d\n", cidx, len(currCD.uncompressed)) 172 173 if catDecomp { 174 fmt.Printf(" chunk[%d].decomp.data:\n", cidx) 175 fmt.Println(hexView(currCD.uncompressed, " ")) 176 } 177 178 if !catNoShow { 179 value, err := types.DecodeValue(chunk, db) 180 181 if err != nil { 182 fmt.Println(" error reading value (Could be a format issue).") 183 continue 184 } 185 186 fmt.Printf(" chunk[%d].value.kind: %s\n", cidx, value.Kind()) 187 fmt.Printf(" chunk[%d].value:\n\n", cidx) 188 printValue(ctx, os.Stdout, value, filepath.Dir(chunkFile)+"::#"+b32Hash) 189 fmt.Println() 190 } 191 192 refIdx := 0 193 err = types.WalkRefs(chunk, db.Format(), func(ref types.Ref) error { 194 if refIdx == 0 { 195 fmt.Printf(" chunk[%d] references chunks:\n", cidx) 196 } 197 198 fmt.Printf(" Ref Hash: %s\n", ref.TargetHash().String()) 199 refIdx++ 200 201 return nil 202 }) 203 204 d.PanicIfError(err) 205 fmt.Println() 206 } 207 208 if pos != 0 { 209 panic("Didn't read the whole file") 210 } 211 212 return 0 213 } 214 215 func parseFooter(bytes []byte, pos int) (int, footer) { 216 magicBytes := bytes[pos-magicSize : pos] 217 pos -= magicSize 218 219 totalSizeBytes := bytes[pos-totalUncmpSize : pos] 220 pos -= totalUncmpSize 221 222 chunkCntBytes := bytes[pos-chunkCntSize : pos] 223 pos -= chunkCntSize 224 225 return pos, footer{ 226 chunkCnt: binary.BigEndian.Uint32(chunkCntBytes), 227 uncompSize: binary.BigEndian.Uint64(totalSizeBytes), 228 magicMatch: binary.BigEndian.Uint64(magicBytes) == magicNumber, 229 } 230 } 231 232 func parsePrefixIndices(bytes []byte, pos, numChunks int) (int, []prefixIndex) { 233 var hashPrefixes [][]byte 234 var ordinals []uint32 235 for i := 0; i < numChunks; i++ { 236 ordinalBytes := bytes[pos-ordinalSize : pos] 237 pos -= ordinalSize 238 239 hashPrefixBytes := bytes[pos-prefixSize : pos] 240 pos -= prefixSize 241 242 hashPrefixes = append(hashPrefixes, hashPrefixBytes) 243 ordinals = append(ordinals, binary.BigEndian.Uint32(ordinalBytes)) 244 } 245 246 var indices []prefixIndex 247 for i := numChunks - 1; i >= 0; i-- { 248 indices = append(indices, prefixIndex{ 249 hashPrefix: hashPrefixes[i], 250 chunkIndex: ordinals[i], 251 }) 252 } 253 254 return pos, indices 255 } 256 257 func parseChunkSuffixes(bytes []byte, pos, numChunks int) (int, [][]byte) { 258 pos -= suffixSize * numChunks 259 260 var suffixes [][]byte 261 for i := 0; i < numChunks; i++ { 262 start := pos + (i * suffixSize) 263 suffixes = append(suffixes, bytes[start:start+suffixSize]) 264 } 265 266 return pos, suffixes 267 } 268 269 func parseChunkSizes(bytes []byte, pos, numChunks int) (int, []int) { 270 pos -= chunkSizeSize * numChunks 271 272 var sizes []int 273 for i := 0; i < numChunks; i++ { 274 start := pos + (i * chunkSizeSize) 275 sizeBytes := bytes[start : start+chunkSizeSize] 276 277 sizes = append(sizes, int(binary.BigEndian.Uint32(sizeBytes))) 278 } 279 280 return pos, sizes 281 } 282 283 func parseChunks(bytes []byte, pos int, sizes []int) (int, []chunkData) { 284 var crcs []uint32 285 var offsets []uint64 286 var chunkBytes [][]byte 287 for i := 0; i < len(sizes); i++ { 288 size := sizes[len(sizes)-i-1] 289 crcBytes := bytes[pos-crcSize : pos] 290 offset := uint64(pos - size) 291 dataBytes := bytes[offset : pos-crcSize] 292 pos -= size 293 294 crcValInFile := binary.BigEndian.Uint32(crcBytes) 295 crcOfData := crc(dataBytes) 296 297 if crcValInFile != crcOfData { 298 panic("CRC MISMATCH!!!") 299 } 300 301 chunkBytes = append(chunkBytes, dataBytes) 302 crcs = append(crcs, crcValInFile) 303 offsets = append(offsets, offset) 304 } 305 306 var cd []chunkData 307 for i := len(sizes) - 1; i >= 0; i-- { 308 uncompressed, err := snappy.Decode(nil, chunkBytes[i]) 309 d.PanicIfError(err) 310 311 cd = append(cd, chunkData{ 312 compressed: chunkBytes[i], 313 uncompressed: uncompressed, 314 crc: crcs[i], 315 dataOffset: offsets[i], 316 decompSuccess: err == nil, 317 }) 318 } 319 320 return pos, cd 321 } 322 323 func printValue(ctx context.Context, w io.Writer, v types.Value, valSpec string) { 324 defer func() { 325 if r := recover(); r != nil { 326 msg := " Failed to write the value " + valSpec + "\n" 327 io.WriteString(w, msg) 328 } 329 }() 330 331 types.WriteEncodedValue(ctx, w, v) 332 } 333 334 func hexStr(bytes []byte) string { 335 return hex.EncodeToString(bytes) 336 } 337 338 const bytesPerRow = 16 339 340 func hexView(bytes []byte, indent string) string { 341 str := "" 342 for i := 0; i < len(bytes); i += bytesPerRow { 343 rowLen := min(16, len(bytes)-i) 344 rowBytes := bytes[i : i+rowLen] 345 str += indent + hexViewRow(i, rowBytes) + "\n" 346 } 347 348 return str 349 } 350 351 func hexViewRow(firstByteIndex int, rowBytes []byte) string { 352 addr := fmt.Sprintf("%04x", firstByteIndex) 353 354 hexWords := "" 355 for i, b := range rowBytes { 356 hexWords += fmt.Sprintf("%02x", b) 357 358 if i%2 == 1 { 359 hexWords += " " 360 } 361 362 if i%8 == 7 { 363 hexWords += " " 364 } 365 } 366 hexWidth := (bytesPerRow * 2) + (bytesPerRow)/2 + (bytesPerRow)/8 367 368 var charRep []byte 369 for _, b := range rowBytes { 370 if b < 32 || b > 126 { 371 charRep = append(charRep, byte('.')) 372 } else { 373 charRep = append(charRep, b) 374 } 375 } 376 377 formatStr := `%s: %-` + strconv.Itoa(hexWidth) + `s %s` 378 return fmt.Sprintf(formatStr, addr, hexWords, charRep) 379 } 380 381 var b32encoder = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") 382 383 func b32Str(bytes []byte) string { 384 return b32encoder.EncodeToString(bytes) 385 } 386 387 var crcTable = crc32.MakeTable(crc32.Castagnoli) 388 389 func crc(b []byte) uint32 { 390 return crc32.Update(0, crcTable, b) 391 }