github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/index_lookup_prop_test.go (about) 1 // +build big 2 3 // Copyright (c) 2017 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package fs 24 25 import ( 26 "fmt" 27 "io/ioutil" 28 "os" 29 "path/filepath" 30 "reflect" 31 "testing" 32 33 "github.com/m3db/m3/src/dbnode/digest" 34 "github.com/m3db/m3/src/dbnode/persist" 35 "github.com/m3db/m3/src/dbnode/persist/fs/msgpack" 36 "github.com/m3db/m3/src/x/checked" 37 "github.com/m3db/m3/src/x/ident" 38 "github.com/m3db/m3/src/x/mmap" 39 xtime "github.com/m3db/m3/src/x/time" 40 41 "github.com/leanovate/gopter" 42 "github.com/leanovate/gopter/gen" 43 "github.com/leanovate/gopter/prop" 44 "github.com/stretchr/testify/assert" 45 ) 46 47 func TestIndexLookupWriteRead(t *testing.T) { 48 // Define property test function which will be passed various propTestInputs 49 propertyFunc := func(input propTestInput) (bool, error) { 50 // Filter out duplicate IDs 51 writes := []generatedWrite{} 52 unique := map[string]struct{}{} 53 for _, write := range input.realWrites { 54 s := string(write.id.Bytes()) 55 if _, ok := unique[s]; ok { 56 continue 57 } 58 unique[s] = struct{}{} 59 writes = append(writes, write) 60 } 61 62 // Create a temporary directory for each test run 63 dir, err := ioutil.TempDir("", "testdb") 64 if err != nil { 65 return false, err 66 } 67 filePathPrefix := filepath.Join(dir, "") 68 defer os.RemoveAll(dir) 69 70 // NB(r): Use testDefaultOpts to avoid allocing pools each 71 // time we derive options 72 options := testDefaultOpts. 73 // Make sure that every index entry is also in the summaries file for the 74 // sake of verifying behavior 75 SetIndexSummariesPercent(1). 76 SetFilePathPrefix(filePathPrefix). 77 SetWriterBufferSize(testWriterBufferSize) 78 shard := input.shard 79 80 // Instantiate a writer and write the test data 81 w, err := NewWriter(options) 82 if err != nil { 83 return false, fmt.Errorf("err creating writer: %v, ", err) 84 } 85 writerOpts := DataWriterOpenOptions{ 86 BlockSize: testBlockSize, 87 Identifier: FileSetFileIdentifier{ 88 Namespace: testNs1ID, 89 Shard: shard, 90 BlockStart: testWriterStart, 91 }, 92 } 93 err = w.Open(writerOpts) 94 if err != nil { 95 return false, fmt.Errorf("err opening writer: %v, ", err) 96 } 97 shardDirPath := ShardDataDirPath(filePathPrefix, testNs1ID, shard) 98 err = writeTestSummariesData(w, writes) 99 if err != nil { 100 return false, fmt.Errorf("err writing test summaries data: %v, ", err) 101 } 102 103 // Figure out the offsets for the writes so we have something to compare 104 // our results against 105 expectedIndexFileOffsets, err := readIndexFileOffsets( 106 shardDirPath, len(writes), testWriterStart) 107 if err != nil { 108 return false, fmt.Errorf("err reading index file offsets: %v", err) 109 } 110 111 // Read the summaries file into memory 112 summariesFilePath := dataFilesetPathFromTimeAndIndex( 113 shardDirPath, testWriterStart, 0, summariesFileSuffix, false) 114 summariesFile, err := os.Open(summariesFilePath) 115 if err != nil { 116 return false, fmt.Errorf("err opening summaries file: %v, ", err) 117 } 118 summariesFdWithDigest := digest.NewFdWithDigestReader(options.InfoReaderBufferSize()) 119 summariesFdWithDigest.Reset(summariesFile) 120 expectedSummariesDigest := calculateExpectedChecksum(t, summariesFilePath) 121 decoder := msgpack.NewDecoder(options.DecodingOptions()) 122 decoderStream := msgpack.NewByteDecoderStream(nil) 123 indexLookup, err := newNearestIndexOffsetLookupFromSummariesFile( 124 summariesFdWithDigest, expectedSummariesDigest, 125 decoder, decoderStream, len(writes), input.forceMmapMemory, mmap.ReporterOptions{}) 126 if err != nil { 127 return false, fmt.Errorf("err reading index lookup from summaries file: %v, ", err) 128 } 129 130 // Make sure it returns the correct index offset for every ID. 131 resources := newTestReusableSeekerResources() 132 for id, expectedOffset := range expectedIndexFileOffsets { 133 foundOffset, err := indexLookup.getNearestIndexFileOffset(ident.StringID(id), resources) 134 if err != nil { 135 return false, fmt.Errorf("err locating index file offset for: %s, err: %v", id, err) 136 } 137 if expectedOffset != foundOffset { 138 return false, fmt.Errorf( 139 "offsets for: %s do not match, expected: %d, got: %d", 140 id, expectedOffset, foundOffset) 141 } 142 } 143 144 return true, nil 145 } 146 147 parameters := gopter.DefaultTestParameters() 148 parameters.Rng.Seed(123456789) 149 parameters.MinSuccessfulTests = 100 150 props := gopter.NewProperties(parameters) 151 152 props.Property( 153 "Index lookup can properly lookup index offsets", 154 prop.ForAll(propertyFunc, genPropTestInputs()), 155 ) 156 157 props.TestingRun(t) 158 } 159 160 func calculateExpectedChecksum(t *testing.T, filePath string) uint32 { 161 fileBytes, err := ioutil.ReadFile(filePath) 162 assert.NoError(t, err) 163 return digest.Checksum(fileBytes) 164 } 165 166 func writeTestSummariesData(w DataFileSetWriter, writes []generatedWrite) error { 167 for _, write := range writes { 168 metadata := persist.NewMetadataFromIDAndTags(write.id, write.tags, 169 persist.MetadataOptions{}) 170 err := w.Write(metadata, write.data, write.checksum) 171 if err != nil { 172 return err 173 } 174 } 175 return w.Close() 176 } 177 178 type propTestInput struct { 179 // IDs to write and assert against 180 realWrites []generatedWrite 181 // Shard number to use for the files 182 shard uint32 183 // Whether the summaries file bytes should be mmap'd as an 184 // anonymous region or file. 185 forceMmapMemory bool 186 } 187 188 type generatedWrite struct { 189 id ident.ID 190 tags ident.Tags 191 data checked.Bytes 192 checksum uint32 193 } 194 195 func genPropTestInputs() gopter.Gen { 196 return gopter.CombineGens( 197 gen.IntRange(0, 1000), 198 ).FlatMap(func(input interface{}) gopter.Gen { 199 inputs := input.([]interface{}) 200 numRealWrites := inputs[0].(int) 201 return genPropTestInput(numRealWrites) 202 }, reflect.TypeOf(propTestInput{})) 203 } 204 205 func genPropTestInput(numRealWrites int) gopter.Gen { 206 return gopter.CombineGens( 207 gen.SliceOfN(numRealWrites, genWrite()), 208 gen.UInt32(), 209 gen.Bool(), 210 ).Map(func(vals []interface{}) propTestInput { 211 return propTestInput{ 212 realWrites: vals[0].([]generatedWrite), 213 shard: vals[1].(uint32), 214 forceMmapMemory: vals[2].(bool), 215 } 216 }) 217 } 218 219 func genWrite() gopter.Gen { 220 return gopter.CombineGens( 221 // gopter will generate random strings, but some of them may be duplicates 222 // (which can't normally happen for IDs and breaks this codepath), so we 223 // filter down to unique inputs 224 // ID 225 gen.AnyString(), 226 // Tag 1 227 genTagIdent(), 228 genTagIdent(), 229 // Tag 2 230 genTagIdent(), 231 genTagIdent(), 232 // Data 233 gen.SliceOfN(100, gen.UInt8()), 234 ).Map(func(vals []interface{}) generatedWrite { 235 id := vals[0].(string) 236 tags := []ident.Tag{ 237 ident.StringTag(vals[1].(string), vals[2].(string)), 238 ident.StringTag(vals[3].(string), vals[4].(string)), 239 } 240 data := vals[5].([]byte) 241 242 return generatedWrite{ 243 id: ident.StringID(id), 244 tags: ident.NewTags(tags...), 245 data: bytesRefd(data), 246 checksum: digest.Checksum(data), 247 } 248 }) 249 } 250 251 func genTagIdent() gopter.Gen { 252 return gopter.CombineGens( 253 gen.AlphaChar(), 254 gen.AnyString(), 255 ).Map(func(vals []interface{}) string { 256 return string(vals[0].(rune)) + vals[1].(string) 257 }) 258 } 259 260 func readIndexFileOffsets(shardDirPath string, numEntries int, 261 start xtime.UnixNano) (map[string]int64, error) { 262 indexFilePath := dataFilesetPathFromTimeAndIndex(shardDirPath, start, 0, indexFileSuffix, false) 263 buf, err := ioutil.ReadFile(indexFilePath) 264 if err != nil { 265 return nil, fmt.Errorf("err reading index file: %v, ", err) 266 } 267 268 decoderStream := msgpack.NewByteDecoderStream(buf) 269 decoder := msgpack.NewDecoder(testDefaultOpts.DecodingOptions()) 270 decoder.Reset(decoderStream) 271 272 summariesOffsets := map[string]int64{} 273 for read := 0; read < numEntries; read++ { 274 offset := int64(len(buf)) - (decoderStream.Remaining()) 275 entry, err := decoder.DecodeIndexEntry(nil) 276 if err != nil { 277 return nil, fmt.Errorf("err decoding index entry: %v", err) 278 } 279 summariesOffsets[string(entry.ID)] = offset 280 } 281 return summariesOffsets, nil 282 }