github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/index_lookup_prop_test.go (about)

     1  // +build big
     2  
     3  // Copyright (c) 2017 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package fs
    24  
    25  import (
    26  	"fmt"
    27  	"io/ioutil"
    28  	"os"
    29  	"path/filepath"
    30  	"reflect"
    31  	"testing"
    32  
    33  	"github.com/m3db/m3/src/dbnode/digest"
    34  	"github.com/m3db/m3/src/dbnode/persist"
    35  	"github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    36  	"github.com/m3db/m3/src/x/checked"
    37  	"github.com/m3db/m3/src/x/ident"
    38  	"github.com/m3db/m3/src/x/mmap"
    39  	xtime "github.com/m3db/m3/src/x/time"
    40  
    41  	"github.com/leanovate/gopter"
    42  	"github.com/leanovate/gopter/gen"
    43  	"github.com/leanovate/gopter/prop"
    44  	"github.com/stretchr/testify/assert"
    45  )
    46  
    47  func TestIndexLookupWriteRead(t *testing.T) {
    48  	// Define property test function which will be passed various propTestInputs
    49  	propertyFunc := func(input propTestInput) (bool, error) {
    50  		// Filter out duplicate IDs
    51  		writes := []generatedWrite{}
    52  		unique := map[string]struct{}{}
    53  		for _, write := range input.realWrites {
    54  			s := string(write.id.Bytes())
    55  			if _, ok := unique[s]; ok {
    56  				continue
    57  			}
    58  			unique[s] = struct{}{}
    59  			writes = append(writes, write)
    60  		}
    61  
    62  		// Create a temporary directory for each test run
    63  		dir, err := ioutil.TempDir("", "testdb")
    64  		if err != nil {
    65  			return false, err
    66  		}
    67  		filePathPrefix := filepath.Join(dir, "")
    68  		defer os.RemoveAll(dir)
    69  
    70  		// NB(r): Use testDefaultOpts to avoid allocing pools each
    71  		// time we derive options
    72  		options := testDefaultOpts.
    73  			// Make sure that every index entry is also in the summaries file for the
    74  			// sake of verifying behavior
    75  			SetIndexSummariesPercent(1).
    76  			SetFilePathPrefix(filePathPrefix).
    77  			SetWriterBufferSize(testWriterBufferSize)
    78  		shard := input.shard
    79  
    80  		// Instantiate a writer and write the test data
    81  		w, err := NewWriter(options)
    82  		if err != nil {
    83  			return false, fmt.Errorf("err creating writer: %v, ", err)
    84  		}
    85  		writerOpts := DataWriterOpenOptions{
    86  			BlockSize: testBlockSize,
    87  			Identifier: FileSetFileIdentifier{
    88  				Namespace:  testNs1ID,
    89  				Shard:      shard,
    90  				BlockStart: testWriterStart,
    91  			},
    92  		}
    93  		err = w.Open(writerOpts)
    94  		if err != nil {
    95  			return false, fmt.Errorf("err opening writer: %v, ", err)
    96  		}
    97  		shardDirPath := ShardDataDirPath(filePathPrefix, testNs1ID, shard)
    98  		err = writeTestSummariesData(w, writes)
    99  		if err != nil {
   100  			return false, fmt.Errorf("err writing test summaries data: %v, ", err)
   101  		}
   102  
   103  		// Figure out the offsets for the writes so we have something to compare
   104  		// our results against
   105  		expectedIndexFileOffsets, err := readIndexFileOffsets(
   106  			shardDirPath, len(writes), testWriterStart)
   107  		if err != nil {
   108  			return false, fmt.Errorf("err reading index file offsets: %v", err)
   109  		}
   110  
   111  		// Read the summaries file into memory
   112  		summariesFilePath := dataFilesetPathFromTimeAndIndex(
   113  			shardDirPath, testWriterStart, 0, summariesFileSuffix, false)
   114  		summariesFile, err := os.Open(summariesFilePath)
   115  		if err != nil {
   116  			return false, fmt.Errorf("err opening summaries file: %v, ", err)
   117  		}
   118  		summariesFdWithDigest := digest.NewFdWithDigestReader(options.InfoReaderBufferSize())
   119  		summariesFdWithDigest.Reset(summariesFile)
   120  		expectedSummariesDigest := calculateExpectedChecksum(t, summariesFilePath)
   121  		decoder := msgpack.NewDecoder(options.DecodingOptions())
   122  		decoderStream := msgpack.NewByteDecoderStream(nil)
   123  		indexLookup, err := newNearestIndexOffsetLookupFromSummariesFile(
   124  			summariesFdWithDigest, expectedSummariesDigest,
   125  			decoder, decoderStream, len(writes), input.forceMmapMemory, mmap.ReporterOptions{})
   126  		if err != nil {
   127  			return false, fmt.Errorf("err reading index lookup from summaries file: %v, ", err)
   128  		}
   129  
   130  		// Make sure it returns the correct index offset for every ID.
   131  		resources := newTestReusableSeekerResources()
   132  		for id, expectedOffset := range expectedIndexFileOffsets {
   133  			foundOffset, err := indexLookup.getNearestIndexFileOffset(ident.StringID(id), resources)
   134  			if err != nil {
   135  				return false, fmt.Errorf("err locating index file offset for: %s, err: %v", id, err)
   136  			}
   137  			if expectedOffset != foundOffset {
   138  				return false, fmt.Errorf(
   139  					"offsets for: %s do not match, expected: %d, got: %d",
   140  					id, expectedOffset, foundOffset)
   141  			}
   142  		}
   143  
   144  		return true, nil
   145  	}
   146  
   147  	parameters := gopter.DefaultTestParameters()
   148  	parameters.Rng.Seed(123456789)
   149  	parameters.MinSuccessfulTests = 100
   150  	props := gopter.NewProperties(parameters)
   151  
   152  	props.Property(
   153  		"Index lookup can properly lookup index offsets",
   154  		prop.ForAll(propertyFunc, genPropTestInputs()),
   155  	)
   156  
   157  	props.TestingRun(t)
   158  }
   159  
   160  func calculateExpectedChecksum(t *testing.T, filePath string) uint32 {
   161  	fileBytes, err := ioutil.ReadFile(filePath)
   162  	assert.NoError(t, err)
   163  	return digest.Checksum(fileBytes)
   164  }
   165  
   166  func writeTestSummariesData(w DataFileSetWriter, writes []generatedWrite) error {
   167  	for _, write := range writes {
   168  		metadata := persist.NewMetadataFromIDAndTags(write.id, write.tags,
   169  			persist.MetadataOptions{})
   170  		err := w.Write(metadata, write.data, write.checksum)
   171  		if err != nil {
   172  			return err
   173  		}
   174  	}
   175  	return w.Close()
   176  }
   177  
   178  type propTestInput struct {
   179  	// IDs to write and assert against
   180  	realWrites []generatedWrite
   181  	// Shard number to use for the files
   182  	shard uint32
   183  	// Whether the summaries file bytes should be mmap'd as an
   184  	// anonymous region or file.
   185  	forceMmapMemory bool
   186  }
   187  
   188  type generatedWrite struct {
   189  	id       ident.ID
   190  	tags     ident.Tags
   191  	data     checked.Bytes
   192  	checksum uint32
   193  }
   194  
   195  func genPropTestInputs() gopter.Gen {
   196  	return gopter.CombineGens(
   197  		gen.IntRange(0, 1000),
   198  	).FlatMap(func(input interface{}) gopter.Gen {
   199  		inputs := input.([]interface{})
   200  		numRealWrites := inputs[0].(int)
   201  		return genPropTestInput(numRealWrites)
   202  	}, reflect.TypeOf(propTestInput{}))
   203  }
   204  
   205  func genPropTestInput(numRealWrites int) gopter.Gen {
   206  	return gopter.CombineGens(
   207  		gen.SliceOfN(numRealWrites, genWrite()),
   208  		gen.UInt32(),
   209  		gen.Bool(),
   210  	).Map(func(vals []interface{}) propTestInput {
   211  		return propTestInput{
   212  			realWrites:      vals[0].([]generatedWrite),
   213  			shard:           vals[1].(uint32),
   214  			forceMmapMemory: vals[2].(bool),
   215  		}
   216  	})
   217  }
   218  
   219  func genWrite() gopter.Gen {
   220  	return gopter.CombineGens(
   221  		// gopter will generate random strings, but some of them may be duplicates
   222  		// (which can't normally happen for IDs and breaks this codepath), so we
   223  		// filter down to unique inputs
   224  		// ID
   225  		gen.AnyString(),
   226  		// Tag 1
   227  		genTagIdent(),
   228  		genTagIdent(),
   229  		// Tag 2
   230  		genTagIdent(),
   231  		genTagIdent(),
   232  		// Data
   233  		gen.SliceOfN(100, gen.UInt8()),
   234  	).Map(func(vals []interface{}) generatedWrite {
   235  		id := vals[0].(string)
   236  		tags := []ident.Tag{
   237  			ident.StringTag(vals[1].(string), vals[2].(string)),
   238  			ident.StringTag(vals[3].(string), vals[4].(string)),
   239  		}
   240  		data := vals[5].([]byte)
   241  
   242  		return generatedWrite{
   243  			id:       ident.StringID(id),
   244  			tags:     ident.NewTags(tags...),
   245  			data:     bytesRefd(data),
   246  			checksum: digest.Checksum(data),
   247  		}
   248  	})
   249  }
   250  
   251  func genTagIdent() gopter.Gen {
   252  	return gopter.CombineGens(
   253  		gen.AlphaChar(),
   254  		gen.AnyString(),
   255  	).Map(func(vals []interface{}) string {
   256  		return string(vals[0].(rune)) + vals[1].(string)
   257  	})
   258  }
   259  
   260  func readIndexFileOffsets(shardDirPath string, numEntries int,
   261  	start xtime.UnixNano) (map[string]int64, error) {
   262  	indexFilePath := dataFilesetPathFromTimeAndIndex(shardDirPath, start, 0, indexFileSuffix, false)
   263  	buf, err := ioutil.ReadFile(indexFilePath)
   264  	if err != nil {
   265  		return nil, fmt.Errorf("err reading index file: %v, ", err)
   266  	}
   267  
   268  	decoderStream := msgpack.NewByteDecoderStream(buf)
   269  	decoder := msgpack.NewDecoder(testDefaultOpts.DecodingOptions())
   270  	decoder.Reset(decoderStream)
   271  
   272  	summariesOffsets := map[string]int64{}
   273  	for read := 0; read < numEntries; read++ {
   274  		offset := int64(len(buf)) - (decoderStream.Remaining())
   275  		entry, err := decoder.DecodeIndexEntry(nil)
   276  		if err != nil {
   277  			return nil, fmt.Errorf("err decoding index entry: %v", err)
   278  		}
   279  		summariesOffsets[string(entry.ID)] = offset
   280  	}
   281  	return summariesOffsets, nil
   282  }