github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/chunkenc/memchunk_test.go (about)

     1  package chunkenc
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"math"
     9  	"math/rand"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/dustin/go-humanize"
    17  	"github.com/prometheus/prometheus/model/labels"
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  
    21  	"github.com/grafana/loki/pkg/chunkenc/testdata"
    22  	"github.com/grafana/loki/pkg/iter"
    23  	"github.com/grafana/loki/pkg/logproto"
    24  	"github.com/grafana/loki/pkg/logql/log"
    25  	"github.com/grafana/loki/pkg/logql/syntax"
    26  	"github.com/grafana/loki/pkg/logqlmodel/stats"
    27  	"github.com/grafana/loki/pkg/storage/chunk"
    28  )
    29  
    30  var testEncoding = []Encoding{
    31  	EncNone,
    32  	EncGZIP,
    33  	EncLZ4_64k,
    34  	EncLZ4_256k,
    35  	EncLZ4_1M,
    36  	EncLZ4_4M,
    37  	EncSnappy,
    38  	EncFlate,
    39  	EncZstd,
    40  }
    41  
    42  var (
    43  	testBlockSize  = 256 * 1024
    44  	testTargetSize = 1500 * 1024
    45  	testBlockSizes = []int{64 * 1024, 256 * 1024, 512 * 1024}
    46  	countExtractor = func() log.StreamSampleExtractor {
    47  		ex, err := log.NewLineSampleExtractor(log.CountExtractor, nil, nil, false, false)
    48  		if err != nil {
    49  			panic(err)
    50  		}
    51  		return ex.ForStream(labels.Labels{})
    52  	}()
    53  )
    54  
    55  const DefaultHeadBlockFmt = OrderedHeadBlockFmt
    56  
    57  func TestBlocksInclusive(t *testing.T) {
    58  	chk := NewMemChunk(EncNone, DefaultHeadBlockFmt, testBlockSize, testTargetSize)
    59  	err := chk.Append(logprotoEntry(1, "1"))
    60  	require.Nil(t, err)
    61  	err = chk.cut()
    62  	require.Nil(t, err)
    63  
    64  	blocks := chk.Blocks(time.Unix(0, 1), time.Unix(0, 1))
    65  	require.Equal(t, 1, len(blocks))
    66  	require.Equal(t, 1, blocks[0].Entries())
    67  }
    68  
    69  func TestBlock(t *testing.T) {
    70  	for _, enc := range testEncoding {
    71  		t.Run(enc.String(), func(t *testing.T) {
    72  			t.Parallel()
    73  
    74  			chk := NewMemChunk(enc, DefaultHeadBlockFmt, testBlockSize, testTargetSize)
    75  			cases := []struct {
    76  				ts  int64
    77  				str string
    78  				cut bool
    79  			}{
    80  				{
    81  					ts:  1,
    82  					str: "hello, world!",
    83  				},
    84  				{
    85  					ts:  2,
    86  					str: "hello, world2!",
    87  				},
    88  				{
    89  					ts:  3,
    90  					str: "hello, world3!",
    91  				},
    92  				{
    93  					ts:  4,
    94  					str: "hello, world4!",
    95  				},
    96  				{
    97  					ts:  5,
    98  					str: "hello, world5!",
    99  				},
   100  				{
   101  					ts:  6,
   102  					str: "hello, world6!",
   103  					cut: true,
   104  				},
   105  				{
   106  					ts:  7,
   107  					str: "hello, world7!",
   108  				},
   109  				{
   110  					ts:  8,
   111  					str: "hello, worl\nd8!",
   112  				},
   113  				{
   114  					ts:  8,
   115  					str: "hello, world 8, 2!",
   116  				},
   117  				{
   118  					ts:  8,
   119  					str: "hello, world 8, 3!",
   120  				},
   121  				{
   122  					ts:  9,
   123  					str: "",
   124  				},
   125  			}
   126  
   127  			for _, c := range cases {
   128  				require.NoError(t, chk.Append(logprotoEntry(c.ts, c.str)))
   129  				if c.cut {
   130  					require.NoError(t, chk.cut())
   131  				}
   132  			}
   133  
   134  			it, err := chk.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
   135  			require.NoError(t, err)
   136  
   137  			idx := 0
   138  			for it.Next() {
   139  				e := it.Entry()
   140  				require.Equal(t, cases[idx].ts, e.Timestamp.UnixNano())
   141  				require.Equal(t, cases[idx].str, e.Line)
   142  				idx++
   143  			}
   144  
   145  			require.NoError(t, it.Error())
   146  			require.NoError(t, it.Close())
   147  			require.Equal(t, len(cases), idx)
   148  
   149  			sampleIt := chk.SampleIterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), countExtractor)
   150  			idx = 0
   151  			for sampleIt.Next() {
   152  				s := sampleIt.Sample()
   153  				require.Equal(t, cases[idx].ts, s.Timestamp)
   154  				require.Equal(t, 1., s.Value)
   155  				require.NotEmpty(t, s.Hash)
   156  				idx++
   157  			}
   158  
   159  			require.NoError(t, sampleIt.Error())
   160  			require.NoError(t, sampleIt.Close())
   161  			require.Equal(t, len(cases), idx)
   162  
   163  			t.Run("bounded-iteration", func(t *testing.T) {
   164  				it, err := chk.Iterator(context.Background(), time.Unix(0, 3), time.Unix(0, 7), logproto.FORWARD, noopStreamPipeline)
   165  				require.NoError(t, err)
   166  
   167  				idx := 2
   168  				for it.Next() {
   169  					e := it.Entry()
   170  					require.Equal(t, cases[idx].ts, e.Timestamp.UnixNano())
   171  					require.Equal(t, cases[idx].str, e.Line)
   172  					idx++
   173  				}
   174  				require.NoError(t, it.Error())
   175  				require.Equal(t, 6, idx)
   176  			})
   177  		})
   178  	}
   179  }
   180  
   181  func TestReadFormatV1(t *testing.T) {
   182  	t.Parallel()
   183  
   184  	c := NewMemChunk(EncGZIP, DefaultHeadBlockFmt, testBlockSize, testTargetSize)
   185  	fillChunk(c)
   186  	// overrides default v2 format
   187  	c.format = chunkFormatV1
   188  
   189  	b, err := c.Bytes()
   190  	if err != nil {
   191  		t.Fatal(err)
   192  	}
   193  
   194  	r, err := NewByteChunk(b, testBlockSize, testTargetSize)
   195  	if err != nil {
   196  		t.Fatal(err)
   197  	}
   198  
   199  	it, err := r.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
   200  	if err != nil {
   201  		t.Fatal(err)
   202  	}
   203  
   204  	i := int64(0)
   205  	for it.Next() {
   206  		require.Equal(t, i, it.Entry().Timestamp.UnixNano())
   207  		require.Equal(t, testdata.LogString(i), it.Entry().Line)
   208  
   209  		i++
   210  	}
   211  }
   212  
   213  // Test all encodings by populating a memchunk, serializing it,
   214  // re-loading with NewByteChunk, serializing it again, and re-loading into via NewByteChunk once more.
   215  // This tests the integrity of transfer between the following:
   216  // 1) memory populated chunks <-> []byte loaded chunks
   217  // 2) []byte loaded chunks <-> []byte loaded chunks
   218  func TestRoundtripV2(t *testing.T) {
   219  	for _, f := range HeadBlockFmts {
   220  		for _, enc := range testEncoding {
   221  			for _, version := range []byte{chunkFormatV2, chunkFormatV3} {
   222  				t.Run(enc.String(), func(t *testing.T) {
   223  					t.Parallel()
   224  
   225  					c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
   226  					c.format = version
   227  					populated := fillChunk(c)
   228  
   229  					assertLines := func(c *MemChunk) {
   230  						require.Equal(t, enc, c.Encoding())
   231  						it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
   232  						if err != nil {
   233  							t.Fatal(err)
   234  						}
   235  
   236  						i := int64(0)
   237  						var data int64
   238  						for it.Next() {
   239  							require.Equal(t, i, it.Entry().Timestamp.UnixNano())
   240  							require.Equal(t, testdata.LogString(i), it.Entry().Line)
   241  
   242  							data += int64(len(it.Entry().Line))
   243  							i++
   244  						}
   245  						require.Equal(t, populated, data)
   246  					}
   247  
   248  					assertLines(c)
   249  
   250  					// test MemChunk -> NewByteChunk loading
   251  					b, err := c.Bytes()
   252  					if err != nil {
   253  						t.Fatal(err)
   254  					}
   255  
   256  					r, err := NewByteChunk(b, testBlockSize, testTargetSize)
   257  					if err != nil {
   258  						t.Fatal(err)
   259  					}
   260  					assertLines(r)
   261  
   262  					// test NewByteChunk -> NewByteChunk loading
   263  					rOut, err := r.Bytes()
   264  					require.Nil(t, err)
   265  
   266  					loaded, err := NewByteChunk(rOut, testBlockSize, testTargetSize)
   267  					require.Nil(t, err)
   268  
   269  					assertLines(loaded)
   270  				})
   271  			}
   272  		}
   273  	}
   274  }
   275  
   276  func TestRoundtripV3(t *testing.T) {
   277  	for _, f := range HeadBlockFmts {
   278  		for _, enc := range testEncoding {
   279  			t.Run(fmt.Sprintf("%v-%v", f, enc), func(t *testing.T) {
   280  				t.Parallel()
   281  
   282  				c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
   283  				c.format = chunkFormatV3
   284  				_ = fillChunk(c)
   285  
   286  				b, err := c.Bytes()
   287  				require.Nil(t, err)
   288  				r, err := NewByteChunk(b, testBlockSize, testTargetSize)
   289  				require.Nil(t, err)
   290  
   291  				b2, err := r.Bytes()
   292  				require.Nil(t, err)
   293  				require.Equal(t, b, b2)
   294  			})
   295  		}
   296  	}
   297  }
   298  
   299  func TestSerialization(t *testing.T) {
   300  	for _, f := range HeadBlockFmts {
   301  		for _, enc := range testEncoding {
   302  			t.Run(enc.String(), func(t *testing.T) {
   303  				t.Parallel()
   304  
   305  				chk := NewMemChunk(enc, f, testBlockSize, testTargetSize)
   306  
   307  				numSamples := 50000
   308  
   309  				for i := 0; i < numSamples; i++ {
   310  					require.NoError(t, chk.Append(logprotoEntry(int64(i), strconv.Itoa(i))))
   311  				}
   312  				require.NoError(t, chk.Close())
   313  
   314  				byt, err := chk.Bytes()
   315  				require.NoError(t, err)
   316  
   317  				bc, err := NewByteChunk(byt, testBlockSize, testTargetSize)
   318  				require.NoError(t, err)
   319  
   320  				it, err := bc.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), logproto.FORWARD, noopStreamPipeline)
   321  				require.NoError(t, err)
   322  				for i := 0; i < numSamples; i++ {
   323  					require.True(t, it.Next())
   324  
   325  					e := it.Entry()
   326  					require.Equal(t, int64(i), e.Timestamp.UnixNano())
   327  					require.Equal(t, strconv.Itoa(i), e.Line)
   328  				}
   329  				require.NoError(t, it.Error())
   330  
   331  				sampleIt := bc.SampleIterator(context.Background(), time.Unix(0, 0), time.Unix(0, math.MaxInt64), countExtractor)
   332  				for i := 0; i < numSamples; i++ {
   333  					require.True(t, sampleIt.Next(), i)
   334  
   335  					s := sampleIt.Sample()
   336  					require.Equal(t, int64(i), s.Timestamp)
   337  					require.Equal(t, 1., s.Value)
   338  				}
   339  				require.NoError(t, sampleIt.Error())
   340  
   341  				byt2, err := chk.Bytes()
   342  				require.NoError(t, err)
   343  
   344  				require.True(t, bytes.Equal(byt, byt2))
   345  			})
   346  		}
   347  	}
   348  }
   349  
   350  func TestChunkFilling(t *testing.T) {
   351  	for _, f := range HeadBlockFmts {
   352  		for _, enc := range testEncoding {
   353  			t.Run(enc.String(), func(t *testing.T) {
   354  				t.Parallel()
   355  
   356  				chk := NewMemChunk(enc, f, testBlockSize, 0)
   357  				chk.blockSize = 1024
   358  
   359  				// We should be able to append only 10KB of logs.
   360  				maxBytes := chk.blockSize * blocksPerChunk
   361  				lineSize := 512
   362  				lines := maxBytes / lineSize
   363  
   364  				logLine := string(make([]byte, lineSize))
   365  				entry := &logproto.Entry{
   366  					Timestamp: time.Unix(0, 0),
   367  					Line:      logLine,
   368  				}
   369  
   370  				i := int64(0)
   371  				for ; chk.SpaceFor(entry) && i < 30; i++ {
   372  					entry.Timestamp = time.Unix(0, i)
   373  					require.NoError(t, chk.Append(entry))
   374  				}
   375  
   376  				require.Equal(t, int64(lines), i)
   377  
   378  				it, err := chk.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, 100), logproto.FORWARD, noopStreamPipeline)
   379  				require.NoError(t, err)
   380  				i = 0
   381  				for it.Next() {
   382  					entry := it.Entry()
   383  					require.Equal(t, i, entry.Timestamp.UnixNano())
   384  					i++
   385  				}
   386  
   387  				require.Equal(t, int64(lines), i)
   388  			})
   389  		}
   390  	}
   391  }
   392  
   393  func TestGZIPChunkTargetSize(t *testing.T) {
   394  	t.Parallel()
   395  
   396  	chk := NewMemChunk(EncGZIP, DefaultHeadBlockFmt, testBlockSize, testTargetSize)
   397  
   398  	lineSize := 512
   399  	entry := &logproto.Entry{
   400  		Timestamp: time.Unix(0, 0),
   401  		Line:      "",
   402  	}
   403  
   404  	// Use a random number to generate random log data, otherwise the gzip compression is way too good
   405  	// and the following loop has to run waaayyyyy to many times
   406  	// Using the same seed should guarantee the same random numbers and same test data.
   407  	r := rand.New(rand.NewSource(99))
   408  
   409  	i := int64(0)
   410  
   411  	for ; chk.SpaceFor(entry) && i < 5000; i++ {
   412  		logLine := make([]byte, lineSize)
   413  		for j := range logLine {
   414  			logLine[j] = byte(r.Int())
   415  		}
   416  		entry = &logproto.Entry{
   417  			Timestamp: time.Unix(0, 0),
   418  			Line:      string(logLine),
   419  		}
   420  		entry.Timestamp = time.Unix(0, i)
   421  		require.NoError(t, chk.Append(entry))
   422  	}
   423  
   424  	// 5000 is a limit ot make sure the test doesn't run away, we shouldn't need this many log lines to make 1MB chunk
   425  	require.NotEqual(t, 5000, i)
   426  
   427  	require.NoError(t, chk.Close())
   428  
   429  	require.Equal(t, 0, chk.head.UncompressedSize())
   430  
   431  	// Even though the seed is static above and results should be deterministic,
   432  	// we will allow +/- 10% variance
   433  	minSize := int(float64(testTargetSize) * 0.9)
   434  	maxSize := int(float64(testTargetSize) * 1.1)
   435  	require.Greater(t, chk.CompressedSize(), minSize)
   436  	require.Less(t, chk.CompressedSize(), maxSize)
   437  
   438  	// Also verify our utilization is close to 1.0
   439  	ut := chk.Utilization()
   440  	require.Greater(t, ut, 0.99)
   441  	require.Less(t, ut, 1.01)
   442  }
   443  
   444  func TestMemChunk_AppendOutOfOrder(t *testing.T) {
   445  	t.Parallel()
   446  
   447  	type tester func(t *testing.T, chk *MemChunk)
   448  
   449  	tests := map[string]tester{
   450  		"append out of order in the same block": func(t *testing.T, chk *MemChunk) {
   451  			assert.NoError(t, chk.Append(logprotoEntry(5, "test")))
   452  			assert.NoError(t, chk.Append(logprotoEntry(6, "test")))
   453  
   454  			if chk.headFmt == OrderedHeadBlockFmt {
   455  				assert.EqualError(t, chk.Append(logprotoEntry(1, "test")), ErrOutOfOrder.Error())
   456  			} else {
   457  				assert.NoError(t, chk.Append(logprotoEntry(1, "test")))
   458  			}
   459  		},
   460  		"append out of order in a new block right after cutting the previous one": func(t *testing.T, chk *MemChunk) {
   461  			assert.NoError(t, chk.Append(logprotoEntry(5, "test")))
   462  			assert.NoError(t, chk.Append(logprotoEntry(6, "test")))
   463  			assert.NoError(t, chk.cut())
   464  
   465  			if chk.headFmt == OrderedHeadBlockFmt {
   466  				assert.EqualError(t, chk.Append(logprotoEntry(1, "test")), ErrOutOfOrder.Error())
   467  			} else {
   468  				assert.NoError(t, chk.Append(logprotoEntry(1, "test")))
   469  			}
   470  		},
   471  		"append out of order in a new block after multiple cuts": func(t *testing.T, chk *MemChunk) {
   472  			assert.NoError(t, chk.Append(logprotoEntry(5, "test")))
   473  			assert.NoError(t, chk.cut())
   474  
   475  			assert.NoError(t, chk.Append(logprotoEntry(6, "test")))
   476  			assert.NoError(t, chk.cut())
   477  
   478  			if chk.headFmt == OrderedHeadBlockFmt {
   479  				assert.EqualError(t, chk.Append(logprotoEntry(1, "test")), ErrOutOfOrder.Error())
   480  			} else {
   481  				assert.NoError(t, chk.Append(logprotoEntry(1, "test")))
   482  			}
   483  		},
   484  	}
   485  
   486  	for _, f := range HeadBlockFmts {
   487  		for testName, tester := range tests {
   488  			tester := tester
   489  
   490  			t.Run(testName, func(t *testing.T) {
   491  				t.Parallel()
   492  
   493  				tester(t, NewMemChunk(EncGZIP, f, testBlockSize, testTargetSize))
   494  			})
   495  		}
   496  	}
   497  }
   498  
   499  func TestChunkSize(t *testing.T) {
   500  	type res struct {
   501  		name           string
   502  		size           uint64
   503  		compressedSize uint64
   504  		ratio          float64
   505  	}
   506  	var result []res
   507  	for _, bs := range testBlockSizes {
   508  		for _, f := range HeadBlockFmts {
   509  			for _, enc := range testEncoding {
   510  				name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
   511  				t.Run(name, func(t *testing.T) {
   512  					c := NewMemChunk(enc, f, bs, testTargetSize)
   513  					inserted := fillChunk(c)
   514  					b, err := c.Bytes()
   515  					if err != nil {
   516  						t.Fatal(err)
   517  					}
   518  					result = append(result, res{
   519  						name:           name,
   520  						size:           uint64(inserted),
   521  						compressedSize: uint64(len(b)),
   522  						ratio:          float64(inserted) / float64(len(b)),
   523  					})
   524  				})
   525  			}
   526  		}
   527  	}
   528  	sort.Slice(result, func(i, j int) bool {
   529  		return result[i].ratio > result[j].ratio
   530  	})
   531  	fmt.Printf("%s\t%s\t%s\t%s\n", "name", "uncompressed", "compressed", "ratio")
   532  	for _, r := range result {
   533  		fmt.Printf("%s\t%s\t%s\t%f\n", r.name, humanize.Bytes(r.size), humanize.Bytes(r.compressedSize), r.ratio)
   534  	}
   535  }
   536  
   537  func TestChunkStats(t *testing.T) {
   538  	c := NewMemChunk(EncSnappy, DefaultHeadBlockFmt, testBlockSize, 0)
   539  	first := time.Now()
   540  	entry := &logproto.Entry{
   541  		Timestamp: first,
   542  		Line:      `ts=2020-03-16T13:58:33.459Z caller=dedupe.go:112 component=remote level=debug remote_name=3ea44a url=https:/blan.goo.net/api/prom/push msg=QueueManager.updateShardsLoop lowerBound=45.5 desiredShards=56.724401194003136 upperBound=84.5`,
   543  	}
   544  	inserted := 0
   545  	// fill the chunk with known data size.
   546  	for {
   547  		if !c.SpaceFor(entry) {
   548  			break
   549  		}
   550  		if err := c.Append(entry); err != nil {
   551  			t.Fatal(err)
   552  		}
   553  		inserted++
   554  		entry.Timestamp = entry.Timestamp.Add(time.Nanosecond)
   555  	}
   556  	expectedSize := (inserted * len(entry.Line)) + (inserted * 2 * binary.MaxVarintLen64)
   557  	statsCtx, ctx := stats.NewContext(context.Background())
   558  
   559  	it, err := c.Iterator(ctx, first.Add(-time.Hour), entry.Timestamp.Add(time.Hour), logproto.BACKWARD, noopStreamPipeline)
   560  	if err != nil {
   561  		t.Fatal(err)
   562  	}
   563  	for it.Next() {
   564  	}
   565  	if err := it.Close(); err != nil {
   566  		t.Fatal(err)
   567  	}
   568  	// test on a chunk filling up
   569  	s := statsCtx.Result(time.Since(first), 0, 0)
   570  	require.Equal(t, int64(expectedSize), s.Summary.TotalBytesProcessed)
   571  	require.Equal(t, int64(inserted), s.Summary.TotalLinesProcessed)
   572  
   573  	require.Equal(t, int64(expectedSize), s.TotalDecompressedBytes())
   574  	require.Equal(t, int64(inserted), s.TotalDecompressedLines())
   575  
   576  	b, err := c.Bytes()
   577  	if err != nil {
   578  		t.Fatal(err)
   579  	}
   580  
   581  	// test on a new chunk.
   582  	cb, err := NewByteChunk(b, testBlockSize, testTargetSize)
   583  	if err != nil {
   584  		t.Fatal(err)
   585  	}
   586  	statsCtx, ctx = stats.NewContext(context.Background())
   587  	it, err = cb.Iterator(ctx, first.Add(-time.Hour), entry.Timestamp.Add(time.Hour), logproto.BACKWARD, noopStreamPipeline)
   588  	if err != nil {
   589  		t.Fatal(err)
   590  	}
   591  	for it.Next() {
   592  	}
   593  	if err := it.Close(); err != nil {
   594  		t.Fatal(err)
   595  	}
   596  	s = statsCtx.Result(time.Since(first), 0, 0)
   597  	require.Equal(t, int64(expectedSize), s.Summary.TotalBytesProcessed)
   598  	require.Equal(t, int64(inserted), s.Summary.TotalLinesProcessed)
   599  
   600  	require.Equal(t, int64(expectedSize), s.TotalDecompressedBytes())
   601  	require.Equal(t, int64(inserted), s.TotalDecompressedLines())
   602  }
   603  
   604  func TestIteratorClose(t *testing.T) {
   605  	for _, f := range HeadBlockFmts {
   606  		for _, enc := range testEncoding {
   607  			t.Run(enc.String(), func(t *testing.T) {
   608  				for _, test := range []func(iter iter.EntryIterator, t *testing.T){
   609  					func(iter iter.EntryIterator, t *testing.T) {
   610  						// close without iterating
   611  						if err := iter.Close(); err != nil {
   612  							t.Fatal(err)
   613  						}
   614  					},
   615  					func(iter iter.EntryIterator, t *testing.T) {
   616  						// close after iterating
   617  						for iter.Next() {
   618  							_ = iter.Entry()
   619  						}
   620  						if err := iter.Close(); err != nil {
   621  							t.Fatal(err)
   622  						}
   623  					},
   624  					func(iter iter.EntryIterator, t *testing.T) {
   625  						// close after a single iteration
   626  						iter.Next()
   627  						_ = iter.Entry()
   628  						if err := iter.Close(); err != nil {
   629  							t.Fatal(err)
   630  						}
   631  					},
   632  				} {
   633  					c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
   634  					inserted := fillChunk(c)
   635  					iter, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, inserted), logproto.BACKWARD, noopStreamPipeline)
   636  					if err != nil {
   637  						t.Fatal(err)
   638  					}
   639  					test(iter, t)
   640  				}
   641  			})
   642  		}
   643  	}
   644  }
   645  
   646  var result []Chunk
   647  
   648  func BenchmarkWrite(b *testing.B) {
   649  	chunks := []Chunk{}
   650  
   651  	entry := &logproto.Entry{
   652  		Timestamp: time.Unix(0, 0),
   653  		Line:      testdata.LogString(0),
   654  	}
   655  	i := int64(0)
   656  
   657  	for _, f := range HeadBlockFmts {
   658  		for _, enc := range testEncoding {
   659  			b.Run(fmt.Sprintf("%v-%v", f, enc), func(b *testing.B) {
   660  				for n := 0; n < b.N; n++ {
   661  					c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
   662  					// adds until full so we trigger cut which serialize using gzip
   663  					for c.SpaceFor(entry) {
   664  						_ = c.Append(entry)
   665  						entry.Timestamp = time.Unix(0, i)
   666  						entry.Line = testdata.LogString(i)
   667  						i++
   668  					}
   669  					chunks = append(chunks, c)
   670  				}
   671  				result = chunks
   672  			})
   673  		}
   674  	}
   675  }
   676  
   677  type nomatchPipeline struct{}
   678  
   679  func (nomatchPipeline) BaseLabels() log.LabelsResult { return log.EmptyLabelsResult }
   680  func (nomatchPipeline) Process(_ int64, line []byte) ([]byte, log.LabelsResult, bool) {
   681  	return line, nil, false
   682  }
   683  func (nomatchPipeline) ProcessString(_ int64, line string) (string, log.LabelsResult, bool) {
   684  	return line, nil, false
   685  }
   686  
   687  func BenchmarkRead(b *testing.B) {
   688  	type res struct {
   689  		name  string
   690  		speed float64
   691  	}
   692  	result := []res{}
   693  	for _, bs := range testBlockSizes {
   694  		for _, enc := range testEncoding {
   695  			name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
   696  			b.Run(name, func(b *testing.B) {
   697  				chunks, size := generateData(enc, 5, bs, testTargetSize)
   698  				b.ResetTimer()
   699  				bytesRead := uint64(0)
   700  				now := time.Now()
   701  				for n := 0; n < b.N; n++ {
   702  					for _, c := range chunks {
   703  						// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
   704  						iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
   705  						if err != nil {
   706  							panic(err)
   707  						}
   708  						for iterator.Next() {
   709  							_ = iterator.Entry()
   710  						}
   711  						if err := iterator.Close(); err != nil {
   712  							b.Fatal(err)
   713  						}
   714  					}
   715  					bytesRead += size
   716  				}
   717  				result = append(result, res{
   718  					name:  name,
   719  					speed: float64(bytesRead) / time.Since(now).Seconds(),
   720  				})
   721  			})
   722  
   723  			name = fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
   724  
   725  			b.Run(name, func(b *testing.B) {
   726  				chunks, size := generateData(enc, 5, bs, testTargetSize)
   727  				b.ResetTimer()
   728  				bytesRead := uint64(0)
   729  				now := time.Now()
   730  				for n := 0; n < b.N; n++ {
   731  					for _, c := range chunks {
   732  						iterator := c.SampleIterator(context.Background(), time.Unix(0, 0), time.Now(), countExtractor)
   733  						for iterator.Next() {
   734  							_ = iterator.Sample()
   735  						}
   736  						if err := iterator.Close(); err != nil {
   737  							b.Fatal(err)
   738  						}
   739  					}
   740  					bytesRead += size
   741  				}
   742  				result = append(result, res{
   743  					name:  name,
   744  					speed: float64(bytesRead) / time.Since(now).Seconds(),
   745  				})
   746  			})
   747  		}
   748  	}
   749  	sort.Slice(result, func(i, j int) bool {
   750  		return result[i].speed > result[j].speed
   751  	})
   752  	for _, r := range result {
   753  		fmt.Printf("%s: %.2f MB/s\n", r.name, r.speed/1024/1024)
   754  	}
   755  }
   756  
   757  func BenchmarkBackwardIterator(b *testing.B) {
   758  	for _, bs := range testBlockSizes {
   759  		b.Run(humanize.Bytes(uint64(bs)), func(b *testing.B) {
   760  			b.ReportAllocs()
   761  			c := NewMemChunk(EncSnappy, DefaultHeadBlockFmt, bs, testTargetSize)
   762  			_ = fillChunk(c)
   763  			b.ResetTimer()
   764  			for n := 0; n < b.N; n++ {
   765  				iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.BACKWARD, noopStreamPipeline)
   766  				if err != nil {
   767  					panic(err)
   768  				}
   769  				for iterator.Next() {
   770  					_ = iterator.Entry()
   771  				}
   772  				if err := iterator.Close(); err != nil {
   773  					b.Fatal(err)
   774  				}
   775  			}
   776  		})
   777  	}
   778  }
   779  
   780  func TestGenerateDataSize(t *testing.T) {
   781  	for _, enc := range testEncoding {
   782  		t.Run(enc.String(), func(t *testing.T) {
   783  			chunks, size := generateData(enc, 50, testBlockSize, testTargetSize)
   784  
   785  			bytesRead := uint64(0)
   786  			for _, c := range chunks {
   787  				// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
   788  				iterator, err := c.Iterator(context.TODO(), time.Unix(0, 0), time.Now(), logproto.FORWARD, noopStreamPipeline)
   789  				if err != nil {
   790  					panic(err)
   791  				}
   792  				for iterator.Next() {
   793  					e := iterator.Entry()
   794  					bytesRead += uint64(len(e.Line))
   795  				}
   796  				if err := iterator.Close(); err != nil {
   797  					t.Fatal(err)
   798  				}
   799  			}
   800  
   801  			require.Equal(t, size, bytesRead)
   802  		})
   803  	}
   804  }
   805  
   806  func BenchmarkHeadBlockIterator(b *testing.B) {
   807  	for _, j := range []int{100000, 50000, 15000, 10000} {
   808  		b.Run(fmt.Sprintf("Size %d", j), func(b *testing.B) {
   809  			h := headBlock{}
   810  
   811  			for i := 0; i < j; i++ {
   812  				if err := h.Append(int64(i), "this is the append string"); err != nil {
   813  					b.Fatal(err)
   814  				}
   815  			}
   816  
   817  			b.ResetTimer()
   818  
   819  			for n := 0; n < b.N; n++ {
   820  				iter := h.Iterator(context.Background(), logproto.BACKWARD, 0, math.MaxInt64, noopStreamPipeline)
   821  
   822  				for iter.Next() {
   823  					_ = iter.Entry()
   824  				}
   825  			}
   826  		})
   827  	}
   828  }
   829  
   830  func BenchmarkHeadBlockSampleIterator(b *testing.B) {
   831  	for _, j := range []int{20000, 10000, 8000, 5000} {
   832  		b.Run(fmt.Sprintf("Size %d", j), func(b *testing.B) {
   833  			h := headBlock{}
   834  
   835  			for i := 0; i < j; i++ {
   836  				if err := h.Append(int64(i), "this is the append string"); err != nil {
   837  					b.Fatal(err)
   838  				}
   839  			}
   840  
   841  			b.ResetTimer()
   842  
   843  			for n := 0; n < b.N; n++ {
   844  				iter := h.SampleIterator(context.Background(), 0, math.MaxInt64, countExtractor)
   845  
   846  				for iter.Next() {
   847  					_ = iter.Sample()
   848  				}
   849  				iter.Close()
   850  			}
   851  		})
   852  	}
   853  }
   854  
   855  func TestMemChunk_IteratorBounds(t *testing.T) {
   856  	createChunk := func() *MemChunk {
   857  		t.Helper()
   858  		c := NewMemChunk(EncNone, DefaultHeadBlockFmt, 1e6, 1e6)
   859  
   860  		if err := c.Append(&logproto.Entry{
   861  			Timestamp: time.Unix(0, 1),
   862  			Line:      "1",
   863  		}); err != nil {
   864  			t.Fatal(err)
   865  		}
   866  		if err := c.Append(&logproto.Entry{
   867  			Timestamp: time.Unix(0, 2),
   868  			Line:      "2",
   869  		}); err != nil {
   870  			t.Fatal(err)
   871  		}
   872  		return c
   873  	}
   874  
   875  	for _, tt := range []struct {
   876  		mint, maxt time.Time
   877  		direction  logproto.Direction
   878  		expect     []bool // array of expected values for next call in sequence
   879  	}{
   880  		{time.Unix(0, 0), time.Unix(0, 1), logproto.FORWARD, []bool{false}},
   881  		{time.Unix(0, 1), time.Unix(0, 1), logproto.FORWARD, []bool{true, false}},
   882  		{time.Unix(0, 1), time.Unix(0, 2), logproto.FORWARD, []bool{true, false}},
   883  		{time.Unix(0, 2), time.Unix(0, 2), logproto.FORWARD, []bool{true, false}},
   884  		{time.Unix(0, 1), time.Unix(0, 3), logproto.FORWARD, []bool{true, true, false}},
   885  		{time.Unix(0, 2), time.Unix(0, 3), logproto.FORWARD, []bool{true, false}},
   886  		{time.Unix(0, 3), time.Unix(0, 3), logproto.FORWARD, []bool{false}},
   887  
   888  		{time.Unix(0, 0), time.Unix(0, 1), logproto.BACKWARD, []bool{false}},
   889  		{time.Unix(0, 1), time.Unix(0, 1), logproto.BACKWARD, []bool{true, false}},
   890  		{time.Unix(0, 1), time.Unix(0, 2), logproto.BACKWARD, []bool{true, false}},
   891  		{time.Unix(0, 2), time.Unix(0, 2), logproto.BACKWARD, []bool{true, false}},
   892  		{time.Unix(0, 1), time.Unix(0, 3), logproto.BACKWARD, []bool{true, true, false}},
   893  		{time.Unix(0, 2), time.Unix(0, 3), logproto.BACKWARD, []bool{true, false}},
   894  		{time.Unix(0, 3), time.Unix(0, 3), logproto.BACKWARD, []bool{false}},
   895  	} {
   896  		t.Run(
   897  			fmt.Sprintf("mint:%d,maxt:%d,direction:%s", tt.mint.UnixNano(), tt.maxt.UnixNano(), tt.direction),
   898  			func(t *testing.T) {
   899  				t.Parallel()
   900  
   901  				tt := tt
   902  				c := createChunk()
   903  
   904  				// testing headchunk
   905  				it, err := c.Iterator(context.Background(), tt.mint, tt.maxt, tt.direction, noopStreamPipeline)
   906  				require.NoError(t, err)
   907  				for i := range tt.expect {
   908  					require.Equal(t, tt.expect[i], it.Next())
   909  				}
   910  				require.NoError(t, it.Close())
   911  
   912  				// testing chunk blocks
   913  				require.NoError(t, c.cut())
   914  				it, err = c.Iterator(context.Background(), tt.mint, tt.maxt, tt.direction, noopStreamPipeline)
   915  				require.NoError(t, err)
   916  				for i := range tt.expect {
   917  					require.Equal(t, tt.expect[i], it.Next())
   918  				}
   919  				require.NoError(t, it.Close())
   920  			})
   921  	}
   922  }
   923  
   924  func TestMemchunkLongLine(t *testing.T) {
   925  	for _, enc := range testEncoding {
   926  		t.Run(enc.String(), func(t *testing.T) {
   927  			t.Parallel()
   928  
   929  			c := NewMemChunk(enc, DefaultHeadBlockFmt, testBlockSize, testTargetSize)
   930  			for i := 1; i <= 10; i++ {
   931  				require.NoError(t, c.Append(&logproto.Entry{Timestamp: time.Unix(0, int64(i)), Line: strings.Repeat("e", 200000)}))
   932  			}
   933  			it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Unix(0, 100), logproto.FORWARD, noopStreamPipeline)
   934  			require.NoError(t, err)
   935  			for i := 1; i <= 10; i++ {
   936  				require.True(t, it.Next())
   937  			}
   938  			require.False(t, it.Next())
   939  		})
   940  	}
   941  }
   942  
   943  // Ensure passing a reusable []byte doesn't affect output
   944  func TestBytesWith(t *testing.T) {
   945  	t.Parallel()
   946  
   947  	exp, err := NewMemChunk(EncNone, DefaultHeadBlockFmt, testBlockSize, testTargetSize).BytesWith(nil)
   948  	require.Nil(t, err)
   949  	out, err := NewMemChunk(EncNone, DefaultHeadBlockFmt, testBlockSize, testTargetSize).BytesWith([]byte{1, 2, 3})
   950  	require.Nil(t, err)
   951  
   952  	require.Equal(t, exp, out)
   953  }
   954  
   955  func TestCheckpointEncoding(t *testing.T) {
   956  	t.Parallel()
   957  
   958  	blockSize, targetSize := 256*1024, 1500*1024
   959  	for _, f := range HeadBlockFmts {
   960  		t.Run(f.String(), func(t *testing.T) {
   961  			c := NewMemChunk(EncSnappy, f, blockSize, targetSize)
   962  
   963  			// add a few entries
   964  			for i := 0; i < 5; i++ {
   965  				entry := &logproto.Entry{
   966  					Timestamp: time.Unix(int64(i), 0),
   967  					Line:      fmt.Sprintf("hi there - %d", i),
   968  				}
   969  				require.Equal(t, true, c.SpaceFor(entry))
   970  				require.Nil(t, c.Append(entry))
   971  			}
   972  
   973  			// cut it
   974  			require.Nil(t, c.cut())
   975  
   976  			// add a few more to head
   977  			for i := 5; i < 10; i++ {
   978  				entry := &logproto.Entry{
   979  					Timestamp: time.Unix(int64(i), 0),
   980  					Line:      fmt.Sprintf("hi there - %d", i),
   981  				}
   982  				require.Equal(t, true, c.SpaceFor(entry))
   983  				require.Nil(t, c.Append(entry))
   984  			}
   985  
   986  			// ensure new blocks are not cut
   987  			require.Equal(t, 1, len(c.blocks))
   988  
   989  			var chk, head bytes.Buffer
   990  			err := c.SerializeForCheckpointTo(&chk, &head)
   991  			require.Nil(t, err)
   992  
   993  			cpy, err := MemchunkFromCheckpoint(chk.Bytes(), head.Bytes(), f, blockSize, targetSize)
   994  			require.Nil(t, err)
   995  
   996  			require.Equal(t, c, cpy)
   997  		})
   998  	}
   999  }
  1000  
  1001  var (
  1002  	streams = []logproto.Stream{}
  1003  	series  = []logproto.Series{}
  1004  )
  1005  
  1006  func BenchmarkBufferedIteratorLabels(b *testing.B) {
  1007  	for _, f := range HeadBlockFmts {
  1008  		b.Run(f.String(), func(b *testing.B) {
  1009  			c := NewMemChunk(EncSnappy, f, testBlockSize, testTargetSize)
  1010  			_ = fillChunk(c)
  1011  
  1012  			labelsSet := []labels.Labels{
  1013  				{
  1014  					{Name: "cluster", Value: "us-central1"},
  1015  					{Name: "stream", Value: "stdout"},
  1016  					{Name: "filename", Value: "/var/log/pods/loki-prod_query-frontend-6894f97b98-89q2n_eac98024-f60f-44af-a46f-d099bc99d1e7/query-frontend/0.log"},
  1017  					{Name: "namespace", Value: "loki-dev"},
  1018  					{Name: "job", Value: "loki-prod/query-frontend"},
  1019  					{Name: "container", Value: "query-frontend"},
  1020  					{Name: "pod", Value: "query-frontend-6894f97b98-89q2n"},
  1021  				},
  1022  				{
  1023  					{Name: "cluster", Value: "us-central2"},
  1024  					{Name: "stream", Value: "stderr"},
  1025  					{Name: "filename", Value: "/var/log/pods/loki-prod_querier-6894f97b98-89q2n_eac98024-f60f-44af-a46f-d099bc99d1e7/query-frontend/0.log"},
  1026  					{Name: "namespace", Value: "loki-dev"},
  1027  					{Name: "job", Value: "loki-prod/querier"},
  1028  					{Name: "container", Value: "querier"},
  1029  					{Name: "pod", Value: "querier-6894f97b98-89q2n"},
  1030  				},
  1031  			}
  1032  			for _, test := range []string{
  1033  				`{app="foo"}`,
  1034  				`{app="foo"} != "foo"`,
  1035  				`{app="foo"} != "foo" | logfmt `,
  1036  				`{app="foo"} != "foo" | logfmt | duration > 10ms`,
  1037  				`{app="foo"} != "foo" | logfmt | duration > 10ms and component="tsdb"`,
  1038  			} {
  1039  				b.Run(test, func(b *testing.B) {
  1040  					b.ReportAllocs()
  1041  					expr, err := syntax.ParseLogSelector(test, true)
  1042  					if err != nil {
  1043  						b.Fatal(err)
  1044  					}
  1045  					p, err := expr.Pipeline()
  1046  					if err != nil {
  1047  						b.Fatal(err)
  1048  					}
  1049  					var iters []iter.EntryIterator
  1050  					for _, lbs := range labelsSet {
  1051  						it, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, p.ForStream(lbs))
  1052  						if err != nil {
  1053  							b.Fatal(err)
  1054  						}
  1055  						iters = append(iters, it)
  1056  					}
  1057  					b.ResetTimer()
  1058  					for n := 0; n < b.N; n++ {
  1059  						for _, it := range iters {
  1060  							for it.Next() {
  1061  								streams = append(streams, logproto.Stream{Labels: it.Labels(), Entries: []logproto.Entry{it.Entry()}})
  1062  							}
  1063  						}
  1064  					}
  1065  					streams = streams[:0]
  1066  				})
  1067  			}
  1068  
  1069  			for _, test := range []string{
  1070  				`rate({app="foo"}[1m])`,
  1071  				`sum by (cluster) (rate({app="foo"}[10s]))`,
  1072  				`sum by (cluster) (rate({app="foo"} != "foo" [10s]))`,
  1073  				`sum by (cluster) (rate({app="foo"} != "foo" | logfmt[10s]))`,
  1074  				`sum by (caller) (rate({app="foo"} != "foo" | logfmt[10s]))`,
  1075  				`sum by (cluster) (rate({app="foo"} != "foo" | logfmt | duration > 10ms[10s]))`,
  1076  				`sum by (cluster) (rate({app="foo"} != "foo" | logfmt | duration > 10ms and component="tsdb"[1m]))`,
  1077  			} {
  1078  				b.Run(test, func(b *testing.B) {
  1079  					b.ReportAllocs()
  1080  					expr, err := syntax.ParseSampleExpr(test)
  1081  					if err != nil {
  1082  						b.Fatal(err)
  1083  					}
  1084  					ex, err := expr.Extractor()
  1085  					if err != nil {
  1086  						b.Fatal(err)
  1087  					}
  1088  					var iters []iter.SampleIterator
  1089  					for _, lbs := range labelsSet {
  1090  						iters = append(iters, c.SampleIterator(context.Background(), time.Unix(0, 0), time.Now(), ex.ForStream(lbs)))
  1091  					}
  1092  					b.ResetTimer()
  1093  					for n := 0; n < b.N; n++ {
  1094  						for _, it := range iters {
  1095  							for it.Next() {
  1096  								series = append(series, logproto.Series{Labels: it.Labels(), Samples: []logproto.Sample{it.Sample()}})
  1097  							}
  1098  						}
  1099  					}
  1100  					series = series[:0]
  1101  				})
  1102  			}
  1103  		})
  1104  	}
  1105  }
  1106  
  1107  func Test_HeadIteratorReverse(t *testing.T) {
  1108  	for _, f := range HeadBlockFmts {
  1109  		t.Run(f.String(), func(t *testing.T) {
  1110  			c := NewMemChunk(EncSnappy, f, testBlockSize, testTargetSize)
  1111  			genEntry := func(i int64) *logproto.Entry {
  1112  				return &logproto.Entry{
  1113  					Timestamp: time.Unix(0, i),
  1114  					Line:      fmt.Sprintf(`msg="%d"`, i),
  1115  				}
  1116  			}
  1117  			var i int64
  1118  			for e := genEntry(i); c.SpaceFor(e); e, i = genEntry(i+1), i+1 {
  1119  				require.NoError(t, c.Append(e))
  1120  			}
  1121  
  1122  			assertOrder := func(t *testing.T, total int64) {
  1123  				expr, err := syntax.ParseLogSelector(`{app="foo"} | logfmt`, true)
  1124  				require.NoError(t, err)
  1125  				p, err := expr.Pipeline()
  1126  				require.NoError(t, err)
  1127  				it, err := c.Iterator(context.TODO(), time.Unix(0, 0), time.Unix(0, i), logproto.BACKWARD, p.ForStream(labels.Labels{{Name: "app", Value: "foo"}}))
  1128  				require.NoError(t, err)
  1129  				for it.Next() {
  1130  					total--
  1131  					require.Equal(t, total, it.Entry().Timestamp.UnixNano())
  1132  				}
  1133  			}
  1134  
  1135  			assertOrder(t, i)
  1136  			// let's try again without the headblock.
  1137  			require.NoError(t, c.cut())
  1138  			assertOrder(t, i)
  1139  		})
  1140  	}
  1141  }
  1142  
  1143  func TestMemChunk_Rebound(t *testing.T) {
  1144  	chkFrom := time.Unix(0, 0)
  1145  	chkThrough := chkFrom.Add(time.Hour)
  1146  	originalChunk := buildTestMemChunk(t, chkFrom, chkThrough)
  1147  
  1148  	for _, tc := range []struct {
  1149  		name               string
  1150  		sliceFrom, sliceTo time.Time
  1151  		err                error
  1152  	}{
  1153  		{
  1154  			name:      "slice whole chunk",
  1155  			sliceFrom: chkFrom,
  1156  			sliceTo:   chkThrough,
  1157  		},
  1158  		{
  1159  			name:      "slice first half",
  1160  			sliceFrom: chkFrom,
  1161  			sliceTo:   chkFrom.Add(30 * time.Minute),
  1162  		},
  1163  		{
  1164  			name:      "slice second half",
  1165  			sliceFrom: chkFrom.Add(30 * time.Minute),
  1166  			sliceTo:   chkThrough,
  1167  		},
  1168  		{
  1169  			name:      "slice in the middle",
  1170  			sliceFrom: chkFrom.Add(15 * time.Minute),
  1171  			sliceTo:   chkFrom.Add(45 * time.Minute),
  1172  		},
  1173  		{
  1174  			name:      "slice interval not aligned with sample intervals",
  1175  			sliceFrom: chkFrom.Add(time.Second),
  1176  			sliceTo:   chkThrough.Add(-time.Second),
  1177  		},
  1178  		{
  1179  			name:      "slice out of bounds without overlap",
  1180  			err:       chunk.ErrSliceNoDataInRange,
  1181  			sliceFrom: chkThrough.Add(time.Minute),
  1182  			sliceTo:   chkThrough.Add(time.Hour),
  1183  		},
  1184  		{
  1185  			name:      "slice out of bounds with overlap",
  1186  			sliceFrom: chkFrom.Add(10 * time.Minute),
  1187  			sliceTo:   chkThrough.Add(10 * time.Minute),
  1188  		},
  1189  	} {
  1190  		t.Run(tc.name, func(t *testing.T) {
  1191  			newChunk, err := originalChunk.Rebound(tc.sliceFrom, tc.sliceTo, nil)
  1192  			if tc.err != nil {
  1193  				require.Equal(t, tc.err, err)
  1194  				return
  1195  			}
  1196  			require.NoError(t, err)
  1197  
  1198  			// iterate originalChunk from slice start to slice end + nanosecond. Adding a nanosecond here to be inclusive of sample at end time.
  1199  			originalChunkItr, err := originalChunk.Iterator(context.Background(), tc.sliceFrom, tc.sliceTo.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
  1200  			require.NoError(t, err)
  1201  
  1202  			// iterate newChunk for whole chunk interval which should include all the samples in the chunk and hence align it with expected values.
  1203  			newChunkItr, err := newChunk.Iterator(context.Background(), chkFrom, chkThrough, logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
  1204  			require.NoError(t, err)
  1205  
  1206  			for {
  1207  				originalChunksHasMoreSamples := originalChunkItr.Next()
  1208  				newChunkHasMoreSamples := newChunkItr.Next()
  1209  
  1210  				// either both should have samples or none of them
  1211  				require.Equal(t, originalChunksHasMoreSamples, newChunkHasMoreSamples)
  1212  				if !originalChunksHasMoreSamples {
  1213  					break
  1214  				}
  1215  
  1216  				require.Equal(t, originalChunkItr.Entry(), newChunkItr.Entry())
  1217  			}
  1218  		})
  1219  	}
  1220  }
  1221  
  1222  func buildTestMemChunk(t *testing.T, from, through time.Time) *MemChunk {
  1223  	chk := NewMemChunk(EncGZIP, DefaultHeadBlockFmt, defaultBlockSize, 0)
  1224  	for ; from.Before(through); from = from.Add(time.Second) {
  1225  		err := chk.Append(&logproto.Entry{
  1226  			Line:      from.String(),
  1227  			Timestamp: from,
  1228  		})
  1229  		require.NoError(t, err)
  1230  	}
  1231  
  1232  	return chk
  1233  }
  1234  
  1235  func TestMemChunk_ReboundAndFilter_with_filter(t *testing.T) {
  1236  	chkFrom := time.Unix(1, 0) // headBlock.Append treats Unix time 0 as not set so we have to use a later time
  1237  	chkFromPlus5 := chkFrom.Add(5 * time.Second)
  1238  	chkThrough := chkFrom.Add(10 * time.Second)
  1239  	chkThroughPlus1 := chkThrough.Add(1 * time.Second)
  1240  
  1241  	filterFunc := func(in string) bool {
  1242  		return strings.HasPrefix(in, "matching")
  1243  	}
  1244  
  1245  	for _, tc := range []struct {
  1246  		name                               string
  1247  		matchingSliceFrom, matchingSliceTo *time.Time
  1248  		err                                error
  1249  		nrMatching                         int
  1250  		nrNotMatching                      int
  1251  	}{
  1252  		{
  1253  			name:          "no matches",
  1254  			nrMatching:    0,
  1255  			nrNotMatching: 10,
  1256  		},
  1257  		{
  1258  			name:              "some lines removed",
  1259  			matchingSliceFrom: &chkFrom,
  1260  			matchingSliceTo:   &chkFromPlus5,
  1261  			nrMatching:        5,
  1262  			nrNotMatching:     5,
  1263  		},
  1264  		{
  1265  			name:              "all lines match",
  1266  			err:               chunk.ErrSliceNoDataInRange,
  1267  			matchingSliceFrom: &chkFrom,
  1268  			matchingSliceTo:   &chkThroughPlus1,
  1269  		},
  1270  	} {
  1271  		t.Run(tc.name, func(t *testing.T) {
  1272  			originalChunk := buildFilterableTestMemChunk(t, chkFrom, chkThrough, tc.matchingSliceFrom, tc.matchingSliceTo)
  1273  			newChunk, err := originalChunk.Rebound(chkFrom, chkThrough, filterFunc)
  1274  			if tc.err != nil {
  1275  				require.Equal(t, tc.err, err)
  1276  				return
  1277  			}
  1278  			require.NoError(t, err)
  1279  
  1280  			// iterate originalChunk from slice start to slice end + nanosecond. Adding a nanosecond here to be inclusive of sample at end time.
  1281  			originalChunkItr, err := originalChunk.Iterator(context.Background(), chkFrom, chkThrough.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
  1282  			require.NoError(t, err)
  1283  			originalChunkSamples := 0
  1284  			for originalChunkItr.Next() {
  1285  				originalChunkSamples++
  1286  			}
  1287  			require.Equal(t, tc.nrMatching+tc.nrNotMatching, originalChunkSamples)
  1288  
  1289  			// iterate newChunk for whole chunk interval which should include all the samples in the chunk and hence align it with expected values.
  1290  			newChunkItr, err := newChunk.Iterator(context.Background(), chkFrom, chkThrough.Add(time.Nanosecond), logproto.FORWARD, log.NewNoopPipeline().ForStream(labels.Labels{}))
  1291  			require.NoError(t, err)
  1292  			newChunkSamples := 0
  1293  			for newChunkItr.Next() {
  1294  				newChunkSamples++
  1295  			}
  1296  			require.Equal(t, tc.nrNotMatching, newChunkSamples)
  1297  		})
  1298  	}
  1299  }
  1300  
  1301  func buildFilterableTestMemChunk(t *testing.T, from, through time.Time, matchingFrom, matchingTo *time.Time) *MemChunk {
  1302  	chk := NewMemChunk(EncGZIP, DefaultHeadBlockFmt, defaultBlockSize, 0)
  1303  	t.Logf("from   : %v", from.String())
  1304  	t.Logf("through: %v", through.String())
  1305  	for from.Before(through) {
  1306  		// If a line is between matchingFrom and matchingTo add the prefix "matching"
  1307  		if matchingFrom != nil && matchingTo != nil &&
  1308  			(from.Equal(*matchingFrom) || (from.After(*matchingFrom) && (from.Before(*matchingTo)))) {
  1309  			t.Logf("%v matching line", from.String())
  1310  			err := chk.Append(&logproto.Entry{
  1311  				Line:      fmt.Sprintf("matching %v", from.String()),
  1312  				Timestamp: from,
  1313  			})
  1314  			require.NoError(t, err)
  1315  		} else {
  1316  			t.Logf("%v non-match line", from.String())
  1317  			err := chk.Append(&logproto.Entry{
  1318  				Line:      from.String(),
  1319  				Timestamp: from,
  1320  			})
  1321  			require.NoError(t, err)
  1322  		}
  1323  		from = from.Add(time.Second)
  1324  	}
  1325  
  1326  	return chk
  1327  }