github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/cmd/noms/noms_cat.go

github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/cmd/noms/noms_cat.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"context"
    19  	"encoding/base32"
    20  	"encoding/binary"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"hash/crc32"
    24  	"io"
    25  	"io/ioutil"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  
    30  	"github.com/golang/snappy"
    31  	flag "github.com/juju/gnuflag"
    32  
    33  	"github.com/dolthub/dolt/go/store/chunks"
    34  	"github.com/dolthub/dolt/go/store/cmd/noms/util"
    35  	"github.com/dolthub/dolt/go/store/d"
    36  	"github.com/dolthub/dolt/go/store/spec"
    37  	"github.com/dolthub/dolt/go/store/types"
    38  )
    39  
    40  const (
    41  	u64Size        = 8
    42  	u32Size        = 4
    43  	crcSize        = u32Size
    44  	prefixSize     = u64Size
    45  	ordinalSize    = u32Size
    46  	chunkSizeSize  = u32Size
    47  	suffixSize     = 12
    48  	chunkCntSize   = u32Size
    49  	totalUncmpSize = u64Size
    50  	magicSize      = u64Size
    51  
    52  	magicNumber uint64 = 0xffb5d8c22463ee50
    53  )
    54  
    55  var (
    56  	catRaw        = false
    57  	catDecomp     = false
    58  	catNoShow     = false
    59  	catHashesOnly = false
    60  )
    61  
    62  var nomsCat = &util.Command{
    63  	Run:       runCat,
    64  	UsageLine: "cat <file>",
    65  	Short:     "Print the contents of a chunk file",
    66  	Long:      "Print the contents of a chunk file",
    67  	Flags:     setupCatFlags,
    68  	Nargs:     1,
    69  }
    70  
    71  func setupCatFlags() *flag.FlagSet {
    72  	catFlagSet := flag.NewFlagSet("cat", flag.ExitOnError)
    73  	catFlagSet.BoolVar(&catRaw, "raw", false, "If true, includes the raw binary version of each chunk in the nbs file")
    74  	catFlagSet.BoolVar(&catNoShow, "no-show", false, "If true, skips printing of the value")
    75  	catFlagSet.BoolVar(&catHashesOnly, "hashes-only", false, "If true, only prints the b32 hashes")
    76  	catFlagSet.BoolVar(&catDecomp, "decompressed", false, "If true, includes the decompressed binary version of each chunk in the nbs file")
    77  	return catFlagSet
    78  }
    79  
    80  type footer struct {
    81  	chunkCnt   uint32
    82  	uncompSize uint64
    83  	magicMatch bool
    84  }
    85  
    86  type prefixIndex struct {
    87  	hashPrefix []byte
    88  	chunkIndex uint32
    89  }
    90  
    91  type chunkData struct {
    92  	compressed    []byte
    93  	uncompressed  []byte
    94  	dataOffset    uint64
    95  	crc           uint32
    96  	decompSuccess bool
    97  }
    98  
    99  func runCat(ctx context.Context, args []string) int {
   100  	if len(args) < 1 {
   101  		fmt.Fprintln(os.Stderr, "Not enough arguments")
   102  		return 0
   103  	}
   104  
   105  	chunkFile := args[0]
   106  	_, err := os.Stat(chunkFile)
   107  
   108  	if err != nil {
   109  		fmt.Fprintln(os.Stderr, chunkFile+" does not exist")
   110  		return 1
   111  	}
   112  
   113  	fileBytes, err := ioutil.ReadFile(chunkFile)
   114  
   115  	if err != nil {
   116  		fmt.Fprintln(os.Stderr, "Failed to read "+chunkFile, err)
   117  		return 1
   118  	}
   119  
   120  	//read the file backwards
   121  	pos := len(fileBytes)
   122  	pos, footer := parseFooter(fileBytes, pos)
   123  	pos, suffixes := parseChunkSuffixes(fileBytes, pos, int(footer.chunkCnt))
   124  	pos, sizes := parseChunkSizes(fileBytes, pos, int(footer.chunkCnt))
   125  	pos, pi := parsePrefixIndices(fileBytes, pos, int(footer.chunkCnt))
   126  	pos, cd := parseChunks(fileBytes, pos, sizes)
   127  
   128  	fmt.Println("Info for file", chunkFile+":")
   129  	fmt.Printf("    chunk count:                     %d\n", footer.chunkCnt)
   130  	fmt.Printf("    total uncompressed chunk size:   %d\n", footer.uncompSize)
   131  	fmt.Printf("    magic number matches:            %t\n", footer.magicMatch)
   132  	fmt.Println()
   133  
   134  	fmt.Println("Prefix Indices:")
   135  	for i, currPI := range pi {
   136  		var hashData [20]byte
   137  
   138  		cidx := currPI.chunkIndex
   139  		copy(hashData[:], currPI.hashPrefix)
   140  		copy(hashData[prefixSize:], suffixes[cidx])
   141  		b32Hash := b32Str(hashData[:])
   142  
   143  		currCD := cd[cidx]
   144  
   145  		if catHashesOnly {
   146  			fmt.Println("hash:", b32Hash, "offset:", currCD.dataOffset, "size:", len(currCD.compressed))
   147  			continue
   148  		}
   149  
   150  		fmt.Printf("    prefixIndex[%d].hash:        (HEX) %s    (B32) %s\n", i, hexStr(hashData[:]), b32Hash)
   151  		fmt.Printf("    prefixIndex[%d].hash.prefix: (HEX) %s\n", i, hexStr(currPI.hashPrefix))
   152  		fmt.Printf("    prefixIndex[%d].hash.suffix: (HEX) %s\n", i, hexStr(suffixes[cidx]))
   153  		fmt.Println()
   154  
   155  		fmt.Printf("    prefixIndex[%d] references chunk[%d]:\n", i, cidx)
   156  
   157  		chunk := chunks.NewChunkWithHash(hashData, currCD.uncompressed)
   158  
   159  		//Want a clean db every loop
   160  		sp, _ := spec.ForDatabase("mem")
   161  		db := sp.GetDatabase(ctx)
   162  
   163  		fmt.Printf("        chunk[%d].raw.len:     %d\n", cidx, len(currCD.compressed))
   164  
   165  		if catRaw {
   166  			fmt.Printf("        chunk[%d].raw.crc:     %08x\n", cidx, currCD.crc)
   167  			fmt.Printf("        chunk[%d].raw.data:\n", cidx)
   168  			fmt.Println(hexView(currCD.compressed, "                               "))
   169  		}
   170  
   171  		fmt.Printf("        chunk[%d].decomp.len:  %d\n", cidx, len(currCD.uncompressed))
   172  
   173  		if catDecomp {
   174  			fmt.Printf("        chunk[%d].decomp.data:\n", cidx)
   175  			fmt.Println(hexView(currCD.uncompressed, "                               "))
   176  		}
   177  
   178  		if !catNoShow {
   179  			value, err := types.DecodeValue(chunk, db)
   180  
   181  			if err != nil {
   182  				fmt.Println("        error reading value (Could be a format issue).")
   183  				continue
   184  			}
   185  
   186  			fmt.Printf("        chunk[%d].value.kind:  %s\n", cidx, value.Kind())
   187  			fmt.Printf("        chunk[%d].value:\n\n", cidx)
   188  			printValue(ctx, os.Stdout, value, filepath.Dir(chunkFile)+"::#"+b32Hash)
   189  			fmt.Println()
   190  		}
   191  
   192  		refIdx := 0
   193  		err = types.WalkRefs(chunk, db.Format(), func(ref types.Ref) error {
   194  			if refIdx == 0 {
   195  				fmt.Printf("    chunk[%d] references chunks:\n", cidx)
   196  			}
   197  
   198  			fmt.Printf("        Ref Hash: %s\n", ref.TargetHash().String())
   199  			refIdx++
   200  
   201  			return nil
   202  		})
   203  
   204  		d.PanicIfError(err)
   205  		fmt.Println()
   206  	}
   207  
   208  	if pos != 0 {
   209  		panic("Didn't read the whole file")
   210  	}
   211  
   212  	return 0
   213  }
   214  
   215  func parseFooter(bytes []byte, pos int) (int, footer) {
   216  	magicBytes := bytes[pos-magicSize : pos]
   217  	pos -= magicSize
   218  
   219  	totalSizeBytes := bytes[pos-totalUncmpSize : pos]
   220  	pos -= totalUncmpSize
   221  
   222  	chunkCntBytes := bytes[pos-chunkCntSize : pos]
   223  	pos -= chunkCntSize
   224  
   225  	return pos, footer{
   226  		chunkCnt:   binary.BigEndian.Uint32(chunkCntBytes),
   227  		uncompSize: binary.BigEndian.Uint64(totalSizeBytes),
   228  		magicMatch: binary.BigEndian.Uint64(magicBytes) == magicNumber,
   229  	}
   230  }
   231  
   232  func parsePrefixIndices(bytes []byte, pos, numChunks int) (int, []prefixIndex) {
   233  	var hashPrefixes [][]byte
   234  	var ordinals []uint32
   235  	for i := 0; i < numChunks; i++ {
   236  		ordinalBytes := bytes[pos-ordinalSize : pos]
   237  		pos -= ordinalSize
   238  
   239  		hashPrefixBytes := bytes[pos-prefixSize : pos]
   240  		pos -= prefixSize
   241  
   242  		hashPrefixes = append(hashPrefixes, hashPrefixBytes)
   243  		ordinals = append(ordinals, binary.BigEndian.Uint32(ordinalBytes))
   244  	}
   245  
   246  	var indices []prefixIndex
   247  	for i := numChunks - 1; i >= 0; i-- {
   248  		indices = append(indices, prefixIndex{
   249  			hashPrefix: hashPrefixes[i],
   250  			chunkIndex: ordinals[i],
   251  		})
   252  	}
   253  
   254  	return pos, indices
   255  }
   256  
   257  func parseChunkSuffixes(bytes []byte, pos, numChunks int) (int, [][]byte) {
   258  	pos -= suffixSize * numChunks
   259  
   260  	var suffixes [][]byte
   261  	for i := 0; i < numChunks; i++ {
   262  		start := pos + (i * suffixSize)
   263  		suffixes = append(suffixes, bytes[start:start+suffixSize])
   264  	}
   265  
   266  	return pos, suffixes
   267  }
   268  
   269  func parseChunkSizes(bytes []byte, pos, numChunks int) (int, []int) {
   270  	pos -= chunkSizeSize * numChunks
   271  
   272  	var sizes []int
   273  	for i := 0; i < numChunks; i++ {
   274  		start := pos + (i * chunkSizeSize)
   275  		sizeBytes := bytes[start : start+chunkSizeSize]
   276  
   277  		sizes = append(sizes, int(binary.BigEndian.Uint32(sizeBytes)))
   278  	}
   279  
   280  	return pos, sizes
   281  }
   282  
   283  func parseChunks(bytes []byte, pos int, sizes []int) (int, []chunkData) {
   284  	var crcs []uint32
   285  	var offsets []uint64
   286  	var chunkBytes [][]byte
   287  	for i := 0; i < len(sizes); i++ {
   288  		size := sizes[len(sizes)-i-1]
   289  		crcBytes := bytes[pos-crcSize : pos]
   290  		offset := uint64(pos - size)
   291  		dataBytes := bytes[offset : pos-crcSize]
   292  		pos -= size
   293  
   294  		crcValInFile := binary.BigEndian.Uint32(crcBytes)
   295  		crcOfData := crc(dataBytes)
   296  
   297  		if crcValInFile != crcOfData {
   298  			panic("CRC MISMATCH!!!")
   299  		}
   300  
   301  		chunkBytes = append(chunkBytes, dataBytes)
   302  		crcs = append(crcs, crcValInFile)
   303  		offsets = append(offsets, offset)
   304  	}
   305  
   306  	var cd []chunkData
   307  	for i := len(sizes) - 1; i >= 0; i-- {
   308  		uncompressed, err := snappy.Decode(nil, chunkBytes[i])
   309  		d.PanicIfError(err)
   310  
   311  		cd = append(cd, chunkData{
   312  			compressed:    chunkBytes[i],
   313  			uncompressed:  uncompressed,
   314  			crc:           crcs[i],
   315  			dataOffset:    offsets[i],
   316  			decompSuccess: err == nil,
   317  		})
   318  	}
   319  
   320  	return pos, cd
   321  }
   322  
   323  func printValue(ctx context.Context, w io.Writer, v types.Value, valSpec string) {
   324  	defer func() {
   325  		if r := recover(); r != nil {
   326  			msg := "   Failed to write the value " + valSpec + "\n"
   327  			io.WriteString(w, msg)
   328  		}
   329  	}()
   330  
   331  	types.WriteEncodedValue(ctx, w, v)
   332  }
   333  
   334  func hexStr(bytes []byte) string {
   335  	return hex.EncodeToString(bytes)
   336  }
   337  
   338  const bytesPerRow = 16
   339  
   340  func hexView(bytes []byte, indent string) string {
   341  	str := ""
   342  	for i := 0; i < len(bytes); i += bytesPerRow {
   343  		rowLen := min(16, len(bytes)-i)
   344  		rowBytes := bytes[i : i+rowLen]
   345  		str += indent + hexViewRow(i, rowBytes) + "\n"
   346  	}
   347  
   348  	return str
   349  }
   350  
   351  func hexViewRow(firstByteIndex int, rowBytes []byte) string {
   352  	addr := fmt.Sprintf("%04x", firstByteIndex)
   353  
   354  	hexWords := ""
   355  	for i, b := range rowBytes {
   356  		hexWords += fmt.Sprintf("%02x", b)
   357  
   358  		if i%2 == 1 {
   359  			hexWords += " "
   360  		}
   361  
   362  		if i%8 == 7 {
   363  			hexWords += " "
   364  		}
   365  	}
   366  	hexWidth := (bytesPerRow * 2) + (bytesPerRow)/2 + (bytesPerRow)/8
   367  
   368  	var charRep []byte
   369  	for _, b := range rowBytes {
   370  		if b < 32 || b > 126 {
   371  			charRep = append(charRep, byte('.'))
   372  		} else {
   373  			charRep = append(charRep, b)
   374  		}
   375  	}
   376  
   377  	formatStr := `%s:  %-` + strconv.Itoa(hexWidth) + `s %s`
   378  	return fmt.Sprintf(formatStr, addr, hexWords, charRep)
   379  }
   380  
   381  var b32encoder = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv")
   382  
   383  func b32Str(bytes []byte) string {
   384  	return b32encoder.EncodeToString(bytes)
   385  }
   386  
   387  var crcTable = crc32.MakeTable(crc32.Castagnoli)
   388  
   389  func crc(b []byte) uint32 {
   390  	return crc32.Update(0, crcTable, b)
   391  }