github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/cmd/noms/noms_cat.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"context"
    19  	"encoding/base32"
    20  	"encoding/binary"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"hash/crc32"
    24  	"io"
    25  	"os"
    26  	"path/filepath"
    27  	"strconv"
    28  
    29  	"github.com/golang/snappy"
    30  	flag "github.com/juju/gnuflag"
    31  
    32  	"github.com/dolthub/dolt/go/store/chunks"
    33  	"github.com/dolthub/dolt/go/store/cmd/noms/util"
    34  	"github.com/dolthub/dolt/go/store/d"
    35  	"github.com/dolthub/dolt/go/store/hash"
    36  	"github.com/dolthub/dolt/go/store/spec"
    37  	"github.com/dolthub/dolt/go/store/types"
    38  )
    39  
    40  const (
    41  	u64Size        = 8
    42  	u32Size        = 4
    43  	crcSize        = u32Size
    44  	prefixSize     = u64Size
    45  	ordinalSize    = u32Size
    46  	chunkSizeSize  = u32Size
    47  	suffixSize     = 12
    48  	chunkCntSize   = u32Size
    49  	totalUncmpSize = u64Size
    50  	magicSize      = u64Size
    51  
    52  	magicNumber uint64 = 0xffb5d8c22463ee50
    53  )
    54  
    55  var (
    56  	catRaw        = false
    57  	catDecomp     = false
    58  	catNoShow     = false
    59  	catNoRefs     = false
    60  	catHashesOnly = false
    61  )
    62  
    63  var nomsCat = &util.Command{
    64  	Run:       runCat,
    65  	UsageLine: "cat <file>",
    66  	Short:     "Print the contents of a chunk file",
    67  	Long:      "Print the contents of a chunk file",
    68  	Flags:     setupCatFlags,
    69  	Nargs:     1,
    70  }
    71  
    72  func setupCatFlags() *flag.FlagSet {
    73  	catFlagSet := flag.NewFlagSet("cat", flag.ExitOnError)
    74  	catFlagSet.BoolVar(&catRaw, "raw", false, "If true, includes the raw binary version of each chunk in the nbs file")
    75  	catFlagSet.BoolVar(&catNoShow, "no-show", false, "If true, skips printing of the value")
    76  	catFlagSet.BoolVar(&catNoRefs, "no-refs", false, "If true, skips printing of the refs")
    77  	catFlagSet.BoolVar(&catHashesOnly, "hashes-only", false, "If true, only prints the b32 hashes")
    78  	catFlagSet.BoolVar(&catDecomp, "decompressed", false, "If true, includes the decompressed binary version of each chunk in the nbs file")
    79  	return catFlagSet
    80  }
    81  
    82  type footer struct {
    83  	chunkCnt   uint32
    84  	uncompSize uint64
    85  	magicMatch bool
    86  }
    87  
    88  type prefixIndex struct {
    89  	hashPrefix []byte
    90  	chunkIndex uint32
    91  }
    92  
    93  type chunkData struct {
    94  	compressed    []byte
    95  	uncompressed  []byte
    96  	dataOffset    uint64
    97  	crc           uint32
    98  	decompSuccess bool
    99  }
   100  
   101  func runCat(ctx context.Context, args []string) int {
   102  	if len(args) < 1 {
   103  		fmt.Fprintln(os.Stderr, "Not enough arguments")
   104  		return 0
   105  	}
   106  
   107  	chunkFile := args[0]
   108  	_, err := os.Stat(chunkFile)
   109  
   110  	if err != nil {
   111  		fmt.Fprintln(os.Stderr, chunkFile+" does not exist")
   112  		return 1
   113  	}
   114  
   115  	fileBytes, err := os.ReadFile(chunkFile)
   116  
   117  	if err != nil {
   118  		fmt.Fprintln(os.Stderr, "Failed to read "+chunkFile, err)
   119  		return 1
   120  	}
   121  
   122  	//read the file backwards
   123  	pos := len(fileBytes)
   124  	pos, footer := parseFooter(fileBytes, pos)
   125  	pos, suffixes := parseChunkSuffixes(fileBytes, pos, int(footer.chunkCnt))
   126  	pos, sizes := parseChunkSizes(fileBytes, pos, int(footer.chunkCnt))
   127  	pos, pi := parsePrefixIndices(fileBytes, pos, int(footer.chunkCnt))
   128  	pos, cd := parseChunks(fileBytes, pos, sizes)
   129  
   130  	fmt.Println("Info for file", chunkFile+":")
   131  	fmt.Printf("    chunk count:                     %d\n", footer.chunkCnt)
   132  	fmt.Printf("    total uncompressed chunk size:   %d\n", footer.uncompSize)
   133  	fmt.Printf("    magic number matches:            %t\n", footer.magicMatch)
   134  	fmt.Println()
   135  
   136  	fmt.Println("Prefix Indices:")
   137  	for i, currPI := range pi {
   138  		var hashData [20]byte
   139  
   140  		cidx := currPI.chunkIndex
   141  		copy(hashData[:], currPI.hashPrefix)
   142  		copy(hashData[prefixSize:], suffixes[cidx])
   143  		b32Hash := b32Str(hashData[:])
   144  
   145  		currCD := cd[cidx]
   146  
   147  		if catHashesOnly {
   148  			fmt.Println("hash:", b32Hash, "offset:", currCD.dataOffset, "size:", len(currCD.compressed))
   149  			continue
   150  		}
   151  
   152  		fmt.Printf("    prefixIndex[%d].hash:        (HEX) %s    (B32) %s\n", i, hexStr(hashData[:]), b32Hash)
   153  		fmt.Printf("    prefixIndex[%d].hash.prefix: (HEX) %s\n", i, hexStr(currPI.hashPrefix))
   154  		fmt.Printf("    prefixIndex[%d].hash.suffix: (HEX) %s\n", i, hexStr(suffixes[cidx]))
   155  		fmt.Println()
   156  
   157  		fmt.Printf("    prefixIndex[%d] references chunk[%d]:\n", i, cidx)
   158  
   159  		chunk := chunks.NewChunkWithHash(hashData, currCD.uncompressed)
   160  
   161  		//Want a clean db every loop
   162  		sp, _ := spec.ForDatabase("mem")
   163  		vrw := sp.GetVRW(ctx)
   164  		waf := types.WalkAddrsForNBF(vrw.Format(), nil)
   165  
   166  		fmt.Printf("        chunk[%d].raw.len:     %d\n", cidx, len(currCD.compressed))
   167  
   168  		if catRaw {
   169  			fmt.Printf("        chunk[%d].raw.crc:     %08x\n", cidx, currCD.crc)
   170  			fmt.Printf("        chunk[%d].raw.data:\n", cidx)
   171  			fmt.Println(hexView(currCD.compressed, "                               "))
   172  		}
   173  
   174  		fmt.Printf("        chunk[%d].decomp.len:  %d\n", cidx, len(currCD.uncompressed))
   175  
   176  		if catDecomp {
   177  			fmt.Printf("        chunk[%d].decomp.data:\n", cidx)
   178  			fmt.Println(hexView(currCD.uncompressed, "                               "))
   179  		}
   180  
   181  		if !catNoShow {
   182  			value, err := types.DecodeValue(chunk, vrw)
   183  
   184  			if err != nil {
   185  				fmt.Println("        error reading value (Could be a format issue).")
   186  				continue
   187  			}
   188  
   189  			fmt.Printf("        chunk[%d].value.kind:  %s\n", cidx, value.Kind())
   190  			fmt.Printf("        chunk[%d].value:\n\n", cidx)
   191  			printValue(ctx, os.Stdout, value, filepath.Dir(chunkFile)+"::#"+b32Hash)
   192  			fmt.Println()
   193  		}
   194  
   195  		if !catNoRefs {
   196  			refIdx := 0
   197  			err = waf(chunk, func(addr hash.Hash, _ bool) error {
   198  				if refIdx == 0 {
   199  					fmt.Printf("    chunk[%d] references chunks:\n", cidx)
   200  				}
   201  
   202  				fmt.Printf("        Ref Hash: %s\n", addr.String())
   203  				refIdx++
   204  
   205  				return nil
   206  			})
   207  		}
   208  
   209  		d.PanicIfError(err)
   210  		fmt.Println()
   211  	}
   212  
   213  	if pos != 0 {
   214  		panic("Didn't read the whole file")
   215  	}
   216  
   217  	return 0
   218  }
   219  
   220  func parseFooter(bytes []byte, pos int) (int, footer) {
   221  	magicBytes := bytes[pos-magicSize : pos]
   222  	pos -= magicSize
   223  
   224  	totalSizeBytes := bytes[pos-totalUncmpSize : pos]
   225  	pos -= totalUncmpSize
   226  
   227  	chunkCntBytes := bytes[pos-chunkCntSize : pos]
   228  	pos -= chunkCntSize
   229  
   230  	return pos, footer{
   231  		chunkCnt:   binary.BigEndian.Uint32(chunkCntBytes),
   232  		uncompSize: binary.BigEndian.Uint64(totalSizeBytes),
   233  		magicMatch: binary.BigEndian.Uint64(magicBytes) == magicNumber,
   234  	}
   235  }
   236  
   237  func parsePrefixIndices(bytes []byte, pos, numChunks int) (int, []prefixIndex) {
   238  	var hashPrefixes [][]byte
   239  	var ordinals []uint32
   240  	for i := 0; i < numChunks; i++ {
   241  		ordinalBytes := bytes[pos-ordinalSize : pos]
   242  		pos -= ordinalSize
   243  
   244  		hashPrefixBytes := bytes[pos-prefixSize : pos]
   245  		pos -= prefixSize
   246  
   247  		hashPrefixes = append(hashPrefixes, hashPrefixBytes)
   248  		ordinals = append(ordinals, binary.BigEndian.Uint32(ordinalBytes))
   249  	}
   250  
   251  	var indices []prefixIndex
   252  	for i := numChunks - 1; i >= 0; i-- {
   253  		indices = append(indices, prefixIndex{
   254  			hashPrefix: hashPrefixes[i],
   255  			chunkIndex: ordinals[i],
   256  		})
   257  	}
   258  
   259  	return pos, indices
   260  }
   261  
   262  func parseChunkSuffixes(bytes []byte, pos, numChunks int) (int, [][]byte) {
   263  	pos -= suffixSize * numChunks
   264  
   265  	var suffixes [][]byte
   266  	for i := 0; i < numChunks; i++ {
   267  		start := pos + (i * suffixSize)
   268  		suffixes = append(suffixes, bytes[start:start+suffixSize])
   269  	}
   270  
   271  	return pos, suffixes
   272  }
   273  
   274  func parseChunkSizes(bytes []byte, pos, numChunks int) (int, []int) {
   275  	pos -= chunkSizeSize * numChunks
   276  
   277  	var sizes []int
   278  	for i := 0; i < numChunks; i++ {
   279  		start := pos + (i * chunkSizeSize)
   280  		sizeBytes := bytes[start : start+chunkSizeSize]
   281  
   282  		sizes = append(sizes, int(binary.BigEndian.Uint32(sizeBytes)))
   283  	}
   284  
   285  	return pos, sizes
   286  }
   287  
   288  func parseChunks(bytes []byte, pos int, sizes []int) (int, []chunkData) {
   289  	var crcs []uint32
   290  	var offsets []uint64
   291  	var chunkBytes [][]byte
   292  	for i := 0; i < len(sizes); i++ {
   293  		size := sizes[len(sizes)-i-1]
   294  		crcBytes := bytes[pos-crcSize : pos]
   295  		offset := uint64(pos - size)
   296  		dataBytes := bytes[offset : pos-crcSize]
   297  		pos -= size
   298  
   299  		crcValInFile := binary.BigEndian.Uint32(crcBytes)
   300  		crcOfData := crc(dataBytes)
   301  
   302  		if crcValInFile != crcOfData {
   303  			panic("CRC MISMATCH!!!")
   304  		}
   305  
   306  		chunkBytes = append(chunkBytes, dataBytes)
   307  		crcs = append(crcs, crcValInFile)
   308  		offsets = append(offsets, offset)
   309  	}
   310  
   311  	var cd []chunkData
   312  	for i := len(sizes) - 1; i >= 0; i-- {
   313  		uncompressed, err := snappy.Decode(nil, chunkBytes[i])
   314  		d.PanicIfError(err)
   315  
   316  		cd = append(cd, chunkData{
   317  			compressed:    chunkBytes[i],
   318  			uncompressed:  uncompressed,
   319  			crc:           crcs[i],
   320  			dataOffset:    offsets[i],
   321  			decompSuccess: err == nil,
   322  		})
   323  	}
   324  
   325  	return pos, cd
   326  }
   327  
   328  func printValue(ctx context.Context, w io.Writer, v types.Value, valSpec string) {
   329  	defer func() {
   330  		if r := recover(); r != nil {
   331  			msg := "   Failed to write the value " + valSpec + "\n"
   332  			io.WriteString(w, msg)
   333  		}
   334  	}()
   335  
   336  	types.WriteEncodedValue(ctx, w, v)
   337  }
   338  
   339  func hexStr(bytes []byte) string {
   340  	return hex.EncodeToString(bytes)
   341  }
   342  
   343  const bytesPerRow = 16
   344  
   345  func max(i, j int) int {
   346  	if i > j {
   347  		return i
   348  	}
   349  	return j
   350  }
   351  
   352  func min(i, j int) int {
   353  	if i < j {
   354  		return i
   355  	}
   356  	return j
   357  }
   358  
   359  func hexView(bytes []byte, indent string) string {
   360  	str := ""
   361  	for i := 0; i < len(bytes); i += bytesPerRow {
   362  		rowLen := min(16, len(bytes)-i)
   363  		rowBytes := bytes[i : i+rowLen]
   364  		str += indent + hexViewRow(i, rowBytes) + "\n"
   365  	}
   366  
   367  	return str
   368  }
   369  
   370  func hexViewRow(firstByteIndex int, rowBytes []byte) string {
   371  	addr := fmt.Sprintf("%04x", firstByteIndex)
   372  
   373  	hexWords := ""
   374  	for i, b := range rowBytes {
   375  		hexWords += fmt.Sprintf("%02x", b)
   376  
   377  		if i%2 == 1 {
   378  			hexWords += " "
   379  		}
   380  
   381  		if i%8 == 7 {
   382  			hexWords += " "
   383  		}
   384  	}
   385  	hexWidth := (bytesPerRow * 2) + (bytesPerRow)/2 + (bytesPerRow)/8
   386  
   387  	var charRep []byte
   388  	for _, b := range rowBytes {
   389  		if b < 32 || b > 126 {
   390  			charRep = append(charRep, byte('.'))
   391  		} else {
   392  			charRep = append(charRep, b)
   393  		}
   394  	}
   395  
   396  	formatStr := `%s:  %-` + strconv.Itoa(hexWidth) + `s %s`
   397  	return fmt.Sprintf(formatStr, addr, hexWords, charRep)
   398  }
   399  
   400  var b32encoder = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv")
   401  
   402  func b32Str(bytes []byte) string {
   403  	return b32encoder.EncodeToString(bytes)
   404  }
   405  
   406  var crcTable = crc32.MakeTable(crc32.Castagnoli)
   407  
   408  func crc(b []byte) uint32 {
   409  	return crc32.Update(0, crcTable, b)
   410  }