github.com/thanos-io/thanos@v0.32.5/pkg/block/indexheader/header_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package indexheader 5 6 import ( 7 "context" 8 "fmt" 9 "math" 10 "path/filepath" 11 "strconv" 12 "testing" 13 14 "github.com/go-kit/log" 15 "github.com/oklog/ulid" 16 "github.com/pkg/errors" 17 "github.com/prometheus/prometheus/model/labels" 18 "github.com/prometheus/prometheus/tsdb/encoding" 19 "github.com/prometheus/prometheus/tsdb/fileutil" 20 "github.com/prometheus/prometheus/tsdb/index" 21 "github.com/thanos-io/objstore" 22 "github.com/thanos-io/objstore/providers/filesystem" 23 24 "github.com/efficientgo/core/testutil" 25 "github.com/thanos-io/thanos/pkg/block" 26 "github.com/thanos-io/thanos/pkg/block/metadata" 27 "github.com/thanos-io/thanos/pkg/testutil/e2eutil" 28 ) 29 30 func TestReaders(t *testing.T) { 31 ctx := context.Background() 32 33 tmpDir := t.TempDir() 34 35 bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt")) 36 testutil.Ok(t, err) 37 defer func() { testutil.Ok(t, bkt.Close()) }() 38 39 // Create block index version 2. 40 id1, err := e2eutil.CreateBlock(ctx, tmpDir, []labels.Labels{ 41 {{Name: "a", Value: "1"}}, 42 {{Name: "a", Value: "2"}}, 43 {{Name: "a", Value: "3"}}, 44 {{Name: "a", Value: "4"}}, 45 {{Name: "a", Value: "5"}}, 46 {{Name: "a", Value: "6"}}, 47 {{Name: "a", Value: "7"}}, 48 {{Name: "a", Value: "8"}}, 49 {{Name: "a", Value: "9"}}, 50 // Missing 10 on purpose. 51 {{Name: "a", Value: "11"}}, 52 {{Name: "a", Value: "12"}}, 53 {{Name: "a", Value: "13"}}, 54 {{Name: "a", Value: "1"}, {Name: "longer-string", Value: "1"}}, 55 {{Name: "a", Value: "1"}, {Name: "longer-string", Value: "2"}}, 56 }, 100, 0, 1000, labels.Labels{{Name: "ext1", Value: "1"}}, 124, metadata.NoneFunc) 57 testutil.Ok(t, err) 58 59 testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, filepath.Join(tmpDir, id1.String()), metadata.NoneFunc)) 60 61 // Copy block index version 1 for backward compatibility. 62 /* The block here was produced at the commit 63 706602daed1487f7849990678b4ece4599745905 used in 2.0.0 with: 64 db, _ := Open("v1db", nil, nil, nil) 65 app := db.Appender() 66 app.Add(labels.FromStrings("foo", "bar"), 1, 2) 67 app.Add(labels.FromStrings("foo", "baz"), 3, 4) 68 app.Add(labels.FromStrings("foo", "meh"), 1000*3600*4, 4) // Not in the block. 69 // Make sure we've enough values for the lack of sorting of postings offsets to show up. 70 for i := 0; i < 100; i++ { 71 app.Add(labels.FromStrings("bar", strconv.FormatInt(int64(i), 10)), 0, 0) 72 } 73 app.Commit() 74 db.compact() 75 db.Close() 76 */ 77 78 m, err := metadata.ReadFromDir("./testdata/index_format_v1") 79 testutil.Ok(t, err) 80 e2eutil.Copy(t, "./testdata/index_format_v1", filepath.Join(tmpDir, m.ULID.String())) 81 82 _, err = metadata.InjectThanos(log.NewNopLogger(), filepath.Join(tmpDir, m.ULID.String()), metadata.Thanos{ 83 Labels: labels.Labels{{Name: "ext1", Value: "1"}}.Map(), 84 Downsample: metadata.ThanosDownsample{Resolution: 0}, 85 Source: metadata.TestSource, 86 }, &m.BlockMeta) 87 testutil.Ok(t, err) 88 testutil.Ok(t, block.Upload(ctx, log.NewNopLogger(), bkt, filepath.Join(tmpDir, m.ULID.String()), metadata.NoneFunc)) 89 90 for _, id := range []ulid.ULID{id1, m.ULID} { 91 t.Run(id.String(), func(t *testing.T) { 92 indexFile, err := fileutil.OpenMmapFile(filepath.Join(tmpDir, id.String(), block.IndexFilename)) 93 testutil.Ok(t, err) 94 defer func() { _ = indexFile.Close() }() 95 96 b := realByteSlice(indexFile.Bytes()) 97 98 t.Run("binary reader", func(t *testing.T) { 99 fn := filepath.Join(tmpDir, id.String(), block.IndexHeaderFilename) 100 _, err := WriteBinary(ctx, bkt, id, fn) 101 testutil.Ok(t, err) 102 103 br, err := NewBinaryReader(ctx, log.NewNopLogger(), nil, tmpDir, id, 3) 104 testutil.Ok(t, err) 105 106 defer func() { testutil.Ok(t, br.Close()) }() 107 108 if id == id1 { 109 testutil.Equals(t, 1, br.version) 110 testutil.Equals(t, 2, br.indexVersion) 111 testutil.Equals(t, &BinaryTOC{Symbols: headerLen, PostingsOffsetTable: 70}, br.toc) 112 testutil.Equals(t, int64(710), br.indexLastPostingEnd) 113 testutil.Equals(t, 8, br.symbols.Size()) 114 testutil.Equals(t, 0, len(br.postingsV1)) 115 testutil.Equals(t, 2, len(br.nameSymbols)) 116 testutil.Equals(t, map[string]*postingValueOffsets{ 117 "": { 118 offsets: []postingOffset{{value: "", tableOff: 4}}, 119 lastValOffset: 440, 120 }, 121 "a": { 122 offsets: []postingOffset{ 123 {value: "1", tableOff: 9}, 124 {value: "13", tableOff: 32}, 125 {value: "4", tableOff: 54}, 126 {value: "7", tableOff: 75}, 127 {value: "9", tableOff: 89}, 128 }, 129 lastValOffset: 640, 130 }, 131 "longer-string": { 132 offsets: []postingOffset{ 133 {value: "1", tableOff: 96}, 134 {value: "2", tableOff: 115}, 135 }, 136 lastValOffset: 706, 137 }, 138 }, br.postings) 139 140 vals, err := br.LabelValues("not-existing") 141 testutil.Ok(t, err) 142 testutil.Equals(t, []string(nil), vals) 143 144 // Regression tests for https://github.com/thanos-io/thanos/issues/2213. 145 // Most of not existing value was working despite bug, except in certain unlucky cases 146 // it was causing "invalid size" errors. 147 _, err = br.PostingsOffset("not-existing", "1") 148 testutil.Equals(t, NotFoundRangeErr, err) 149 _, err = br.PostingsOffset("a", "0") 150 testutil.Equals(t, NotFoundRangeErr, err) 151 // Unlucky case, because the bug was causing unnecessary read & decode requiring more bytes than 152 // available. For rest cases read was noop wrong, but at least not failing. 153 _, err = br.PostingsOffset("a", "10") 154 testutil.Equals(t, NotFoundRangeErr, err) 155 _, err = br.PostingsOffset("a", "121") 156 testutil.Equals(t, NotFoundRangeErr, err) 157 _, err = br.PostingsOffset("a", "131") 158 testutil.Equals(t, NotFoundRangeErr, err) 159 _, err = br.PostingsOffset("a", "91") 160 testutil.Equals(t, NotFoundRangeErr, err) 161 _, err = br.PostingsOffset("longer-string", "0") 162 testutil.Equals(t, NotFoundRangeErr, err) 163 _, err = br.PostingsOffset("longer-string", "11") 164 testutil.Equals(t, NotFoundRangeErr, err) 165 _, err = br.PostingsOffset("longer-string", "21") 166 testutil.Equals(t, NotFoundRangeErr, err) 167 } 168 169 compareIndexToHeader(t, b, br) 170 }) 171 172 t.Run("lazy binary reader", func(t *testing.T) { 173 fn := filepath.Join(tmpDir, id.String(), block.IndexHeaderFilename) 174 _, err := WriteBinary(ctx, bkt, id, fn) 175 testutil.Ok(t, err) 176 177 br, err := NewLazyBinaryReader(ctx, log.NewNopLogger(), nil, tmpDir, id, 3, NewLazyBinaryReaderMetrics(nil), nil) 178 testutil.Ok(t, err) 179 180 defer func() { testutil.Ok(t, br.Close()) }() 181 182 compareIndexToHeader(t, b, br) 183 }) 184 }) 185 } 186 187 } 188 189 func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) { 190 indexReader, err := index.NewReader(indexByteSlice) 191 testutil.Ok(t, err) 192 defer func() { _ = indexReader.Close() }() 193 194 actVersion, err := headerReader.IndexVersion() 195 testutil.Ok(t, err) 196 testutil.Equals(t, indexReader.Version(), actVersion) 197 198 if indexReader.Version() == index.FormatV2 { 199 // For v2 symbols ref sequential integers 0, 1, 2 etc. 200 iter := indexReader.Symbols() 201 i := 0 202 for iter.Next() { 203 r, err := headerReader.LookupSymbol(uint32(i)) 204 testutil.Ok(t, err) 205 testutil.Equals(t, iter.At(), r) 206 207 i++ 208 } 209 testutil.Ok(t, iter.Err()) 210 _, err := headerReader.LookupSymbol(uint32(i)) 211 testutil.NotOk(t, err) 212 213 } else { 214 // For v1 symbols refs are actual offsets in the index. 215 symbols, err := getSymbolTable(indexByteSlice) 216 testutil.Ok(t, err) 217 218 for refs, sym := range symbols { 219 r, err := headerReader.LookupSymbol(refs) 220 testutil.Ok(t, err) 221 testutil.Equals(t, sym, r) 222 } 223 _, err = headerReader.LookupSymbol(200000) 224 testutil.NotOk(t, err) 225 } 226 227 expLabelNames, err := indexReader.LabelNames() 228 testutil.Ok(t, err) 229 actualLabelNames, err := headerReader.LabelNames() 230 testutil.Ok(t, err) 231 testutil.Equals(t, expLabelNames, actualLabelNames) 232 233 expRanges, err := indexReader.PostingsRanges() 234 testutil.Ok(t, err) 235 236 minStart := int64(math.MaxInt64) 237 maxEnd := int64(math.MinInt64) 238 for il, lname := range expLabelNames { 239 expectedLabelVals, err := indexReader.SortedLabelValues(lname) 240 testutil.Ok(t, err) 241 242 vals, err := headerReader.LabelValues(lname) 243 testutil.Ok(t, err) 244 testutil.Equals(t, expectedLabelVals, vals) 245 246 for iv, v := range vals { 247 if minStart > expRanges[labels.Label{Name: lname, Value: v}].Start { 248 minStart = expRanges[labels.Label{Name: lname, Value: v}].Start 249 } 250 if maxEnd < expRanges[labels.Label{Name: lname, Value: v}].End { 251 maxEnd = expRanges[labels.Label{Name: lname, Value: v}].End 252 } 253 254 ptr, err := headerReader.PostingsOffset(lname, v) 255 testutil.Ok(t, err) 256 257 // For index-cache those values are exact. 258 // 259 // For binary they are exact except last item posting offset. It's good enough if the value is larger than exact posting ending. 260 if indexReader.Version() == index.FormatV2 { 261 if iv == len(vals)-1 && il == len(expLabelNames)-1 { 262 testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}].Start, ptr.Start) 263 testutil.Assert(t, expRanges[labels.Label{Name: lname, Value: v}].End <= ptr.End, "got offset %v earlier than actual posting end %v ", ptr.End, expRanges[labels.Label{Name: lname, Value: v}].End) 264 continue 265 } 266 } else { 267 // For index formatV1 the last one does not mean literally last value, as postings were not sorted. 268 // Account for that. We know it's 40 label value. 269 if v == "40" { 270 testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}].Start, ptr.Start) 271 testutil.Assert(t, expRanges[labels.Label{Name: lname, Value: v}].End <= ptr.End, "got offset %v earlier than actual posting end %v ", ptr.End, expRanges[labels.Label{Name: lname, Value: v}].End) 272 continue 273 } 274 } 275 testutil.Equals(t, expRanges[labels.Label{Name: lname, Value: v}], ptr) 276 } 277 } 278 279 ptr, err := headerReader.PostingsOffset(index.AllPostingsKey()) 280 testutil.Ok(t, err) 281 testutil.Equals(t, expRanges[labels.Label{Name: "", Value: ""}].Start, ptr.Start) 282 testutil.Equals(t, expRanges[labels.Label{Name: "", Value: ""}].End, ptr.End) 283 } 284 285 func prepareIndexV2Block(t testing.TB, tmpDir string, bkt objstore.Bucket) *metadata.Meta { 286 /* Copy index 6MB block index version 2. It was generated via thanosbench. Meta.json: 287 { 288 "ulid": "01DRBP4RNVZ94135ZA6B10EMRR", 289 "minTime": 1570766415000, 290 "maxTime": 1570939215001, 291 "stats": { 292 "numSamples": 115210000, 293 "numSeries": 10000, 294 "numChunks": 990000 295 }, 296 "compaction": { 297 "level": 1, 298 "sources": [ 299 "01DRBP4RNVZ94135ZA6B10EMRR" 300 ] 301 }, 302 "version": 1, 303 "thanos": { 304 "labels": { 305 "cluster": "one", 306 "dataset": "continuous" 307 }, 308 "downsample": { 309 "resolution": 0 310 }, 311 "source": "blockgen" 312 } 313 } 314 */ 315 316 m, err := metadata.ReadFromDir("./testdata/index_format_v2") 317 testutil.Ok(t, err) 318 e2eutil.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String())) 319 320 _, err = metadata.InjectThanos(log.NewNopLogger(), filepath.Join(tmpDir, m.ULID.String()), metadata.Thanos{ 321 Labels: labels.Labels{{Name: "ext1", Value: "1"}}.Map(), 322 Downsample: metadata.ThanosDownsample{Resolution: 0}, 323 Source: metadata.TestSource, 324 }, &m.BlockMeta) 325 testutil.Ok(t, err) 326 testutil.Ok(t, block.Upload(context.Background(), log.NewNopLogger(), bkt, filepath.Join(tmpDir, m.ULID.String()), metadata.NoneFunc)) 327 328 return m 329 } 330 331 func BenchmarkBinaryWrite(t *testing.B) { 332 ctx := context.Background() 333 334 tmpDir := t.TempDir() 335 336 bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt")) 337 testutil.Ok(t, err) 338 defer func() { testutil.Ok(t, bkt.Close()) }() 339 340 m := prepareIndexV2Block(t, tmpDir, bkt) 341 fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename) 342 343 t.ResetTimer() 344 for i := 0; i < t.N; i++ { 345 _, err := WriteBinary(ctx, bkt, m.ULID, fn) 346 testutil.Ok(t, err) 347 } 348 } 349 350 func BenchmarkBinaryReader(t *testing.B) { 351 ctx := context.Background() 352 tmpDir := t.TempDir() 353 354 bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt")) 355 testutil.Ok(t, err) 356 357 m := prepareIndexV2Block(t, tmpDir, bkt) 358 fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename) 359 _, err = WriteBinary(ctx, bkt, m.ULID, fn) 360 testutil.Ok(t, err) 361 362 t.ResetTimer() 363 for i := 0; i < t.N; i++ { 364 br, err := newFileBinaryReader(fn, 32) 365 testutil.Ok(t, err) 366 testutil.Ok(t, br.Close()) 367 } 368 } 369 370 func BenchmarkBinaryReader_LookupSymbol(b *testing.B) { 371 for _, numSeries := range []int{valueSymbolsCacheSize, valueSymbolsCacheSize * 10} { 372 b.Run(fmt.Sprintf("num series = %d", numSeries), func(b *testing.B) { 373 benchmarkBinaryReaderLookupSymbol(b, numSeries) 374 }) 375 } 376 } 377 378 func benchmarkBinaryReaderLookupSymbol(b *testing.B, numSeries int) { 379 const postingOffsetsInMemSampling = 32 380 381 ctx := context.Background() 382 logger := log.NewNopLogger() 383 384 tmpDir := b.TempDir() 385 386 bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt")) 387 testutil.Ok(b, err) 388 defer func() { testutil.Ok(b, bkt.Close()) }() 389 390 // Generate series labels. 391 seriesLabels := make([]labels.Labels, 0, numSeries) 392 for i := 0; i < numSeries; i++ { 393 seriesLabels = append(seriesLabels, labels.Labels{{Name: "a", Value: strconv.Itoa(i)}}) 394 } 395 396 // Create a block. 397 id1, err := e2eutil.CreateBlock(ctx, tmpDir, seriesLabels, 100, 0, 1000, labels.Labels{{Name: "ext1", Value: "1"}}, 124, metadata.NoneFunc) 398 testutil.Ok(b, err) 399 testutil.Ok(b, block.Upload(ctx, logger, bkt, filepath.Join(tmpDir, id1.String()), metadata.NoneFunc)) 400 401 // Create an index reader. 402 reader, err := NewBinaryReader(ctx, logger, bkt, tmpDir, id1, postingOffsetsInMemSampling) 403 testutil.Ok(b, err) 404 405 // Get the offset of each label value symbol. 406 symbolsOffsets := make([]uint32, numSeries) 407 for i := 0; i < numSeries; i++ { 408 o, err := reader.symbols.ReverseLookup(strconv.Itoa(i)) 409 testutil.Ok(b, err) 410 411 symbolsOffsets[i] = o 412 } 413 414 b.ResetTimer() 415 416 for n := 0; n < b.N; n++ { 417 for i := 0; i < len(symbolsOffsets); i++ { 418 if _, err := reader.LookupSymbol(symbolsOffsets[i]); err != nil { 419 b.Fail() 420 } 421 } 422 } 423 } 424 425 func getSymbolTable(b index.ByteSlice) (map[uint32]string, error) { 426 version := int(b.Range(4, 5)[0]) 427 428 if version != 1 && version != 2 { 429 return nil, errors.Errorf("unknown index file version %d", version) 430 } 431 432 toc, err := index.NewTOCFromByteSlice(b) 433 if err != nil { 434 return nil, errors.Wrap(err, "read TOC") 435 } 436 437 symbolsV2, symbolsV1, err := readSymbols(b, version, int(toc.Symbols)) 438 if err != nil { 439 return nil, errors.Wrap(err, "read symbols") 440 } 441 442 symbolsTable := make(map[uint32]string, len(symbolsV1)+len(symbolsV2)) 443 for o, s := range symbolsV1 { 444 symbolsTable[o] = s 445 } 446 for o, s := range symbolsV2 { 447 symbolsTable[uint32(o)] = s 448 } 449 return symbolsTable, nil 450 } 451 452 // readSymbols reads the symbol table fully into memory and allocates proper strings for them. 453 // Strings backed by the mmap'd memory would cause memory faults if applications keep using them 454 // after the reader is closed. 455 func readSymbols(bs index.ByteSlice, version, off int) ([]string, map[uint32]string, error) { 456 if off == 0 { 457 return nil, nil, nil 458 } 459 d := encoding.NewDecbufAt(bs, off, castagnoliTable) 460 461 var ( 462 origLen = d.Len() 463 cnt = d.Be32int() 464 basePos = uint32(off) + 4 465 nextPos = basePos + uint32(origLen-d.Len()) 466 symbolSlice []string 467 symbols = map[uint32]string{} 468 ) 469 if version == index.FormatV2 { 470 symbolSlice = make([]string, 0, cnt) 471 } 472 473 for d.Err() == nil && d.Len() > 0 && cnt > 0 { 474 s := d.UvarintStr() 475 476 if version == index.FormatV2 { 477 symbolSlice = append(symbolSlice, s) 478 } else { 479 symbols[nextPos] = s 480 nextPos = basePos + uint32(origLen-d.Len()) 481 } 482 cnt-- 483 } 484 return symbolSlice, symbols, errors.Wrap(d.Err(), "read symbols") 485 }