github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/mem_table.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"sort"
    29  
    30  	"golang.org/x/sync/errgroup"
    31  
    32  	"github.com/dolthub/dolt/go/store/chunks"
    33  	"github.com/dolthub/dolt/go/store/hash"
    34  )
    35  
    36  func WriteChunks(chunks []chunks.Chunk) (string, []byte, error) {
    37  	var size uint64
    38  	for _, chunk := range chunks {
    39  		size += uint64(len(chunk.Data()))
    40  	}
    41  
    42  	mt := newMemTable(size)
    43  
    44  	return writeChunksToMT(mt, chunks)
    45  }
    46  
    47  func writeChunksToMT(mt *memTable, chunks []chunks.Chunk) (string, []byte, error) {
    48  	for _, chunk := range chunks {
    49  		if !mt.addChunk(addr(chunk.Hash()), chunk.Data()) {
    50  			return "", nil, errors.New("didn't create this memory table with enough space to add all the chunks")
    51  		}
    52  	}
    53  
    54  	var stats Stats
    55  	name, data, count, err := mt.write(nil, &stats)
    56  
    57  	if err != nil {
    58  		return "", nil, err
    59  	}
    60  
    61  	if count != uint32(len(chunks)) {
    62  		return "", nil, errors.New("didn't write everything")
    63  	}
    64  
    65  	return name.String(), data, nil
    66  }
    67  
    68  type memTable struct {
    69  	chunks             map[addr][]byte
    70  	order              []hasRecord // Must maintain the invariant that these are sorted by rec.order
    71  	maxData, totalData uint64
    72  
    73  	snapper snappyEncoder
    74  }
    75  
    76  func newMemTable(memTableSize uint64) *memTable {
    77  	return &memTable{chunks: map[addr][]byte{}, maxData: memTableSize}
    78  }
    79  
    80  func (mt *memTable) addChunk(h addr, data []byte) bool {
    81  	if len(data) == 0 {
    82  		panic("NBS blocks cannot be zero length")
    83  	}
    84  	if _, ok := mt.chunks[h]; ok {
    85  		return true
    86  	}
    87  	dataLen := uint64(len(data))
    88  	if mt.totalData+dataLen > mt.maxData {
    89  		return false
    90  	}
    91  	mt.totalData += dataLen
    92  	mt.chunks[h] = data
    93  	mt.order = append(mt.order, hasRecord{
    94  		&h,
    95  		h.Prefix(),
    96  		len(mt.order),
    97  		false,
    98  	})
    99  	return true
   100  }
   101  
   102  func (mt *memTable) count() (uint32, error) {
   103  	return uint32(len(mt.order)), nil
   104  }
   105  
   106  func (mt *memTable) uncompressedLen() (uint64, error) {
   107  	return mt.totalData, nil
   108  }
   109  
   110  func (mt *memTable) has(h addr) (bool, error) {
   111  	_, has := mt.chunks[h]
   112  	return has, nil
   113  }
   114  
   115  func (mt *memTable) hasMany(addrs []hasRecord) (bool, error) {
   116  	var remaining bool
   117  	for i, addr := range addrs {
   118  		if addr.has {
   119  			continue
   120  		}
   121  
   122  		ok, err := mt.has(*addr.a)
   123  
   124  		if err != nil {
   125  			return false, err
   126  		}
   127  
   128  		if ok {
   129  			addrs[i].has = true
   130  		} else {
   131  			remaining = true
   132  		}
   133  	}
   134  	return remaining, nil
   135  }
   136  
   137  func (mt *memTable) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) {
   138  	return mt.chunks[h], nil
   139  }
   140  
   141  func (mt *memTable) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(*chunks.Chunk), stats *Stats) (bool, error) {
   142  	var remaining bool
   143  	for _, r := range reqs {
   144  		data := mt.chunks[*r.a]
   145  		if data != nil {
   146  			c := chunks.NewChunkWithHash(hash.Hash(*r.a), data)
   147  			found(&c)
   148  		} else {
   149  			remaining = true
   150  		}
   151  	}
   152  	return remaining, nil
   153  }
   154  
   155  func (mt *memTable) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(CompressedChunk), stats *Stats) (bool, error) {
   156  	var remaining bool
   157  	for _, r := range reqs {
   158  		data := mt.chunks[*r.a]
   159  		if data != nil {
   160  			c := chunks.NewChunkWithHash(hash.Hash(*r.a), data)
   161  			found(ChunkToCompressedChunk(c))
   162  		} else {
   163  			remaining = true
   164  		}
   165  	}
   166  
   167  	return remaining, nil
   168  }
   169  
   170  func (mt *memTable) extract(ctx context.Context, chunks chan<- extractRecord) error {
   171  	for _, hrec := range mt.order {
   172  		chunks <- extractRecord{a: *hrec.a, data: mt.chunks[*hrec.a], err: nil}
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  func (mt *memTable) write(haver chunkReader, stats *Stats) (name addr, data []byte, count uint32, err error) {
   179  	numChunks := uint64(len(mt.order))
   180  	if numChunks == 0 {
   181  		return addr{}, nil, 0, fmt.Errorf("mem table cannot write with zero chunks")
   182  	}
   183  	maxSize := maxTableSize(uint64(len(mt.order)), mt.totalData)
   184  	buff := make([]byte, maxSize)
   185  	tw := newTableWriter(buff, mt.snapper)
   186  
   187  	if haver != nil {
   188  		sort.Sort(hasRecordByPrefix(mt.order)) // hasMany() requires addresses to be sorted.
   189  		_, err := haver.hasMany(mt.order)
   190  
   191  		if err != nil {
   192  			return addr{}, nil, 0, err
   193  		}
   194  
   195  		sort.Sort(hasRecordByOrder(mt.order)) // restore "insertion" order for write
   196  	}
   197  
   198  	for _, addr := range mt.order {
   199  		if !addr.has {
   200  			h := addr.a
   201  			tw.addChunk(*h, mt.chunks[*h])
   202  			count++
   203  		}
   204  	}
   205  	tableSize, name, err := tw.finish()
   206  
   207  	if err != nil {
   208  		return addr{}, nil, 0, err
   209  	}
   210  
   211  	if count > 0 {
   212  		stats.BytesPerPersist.Sample(uint64(tableSize))
   213  		stats.CompressedChunkBytesPerPersist.Sample(uint64(tw.totalCompressedData))
   214  		stats.UncompressedChunkBytesPerPersist.Sample(uint64(tw.totalUncompressedData))
   215  		stats.ChunksPerPersist.Sample(uint64(count))
   216  	}
   217  
   218  	return name, buff[:tableSize], count, nil
   219  }
   220  
   221  func (mt *memTable) Close() error {
   222  	return nil
   223  }