github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/fst/writer_reader_test.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fst
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	"sort"
    27  	"sync"
    28  	"testing"
    29  
    30  	"github.com/m3db/m3/src/m3ninx/doc"
    31  	"github.com/m3db/m3/src/m3ninx/index"
    32  	sgmt "github.com/m3db/m3/src/m3ninx/index/segment"
    33  	"github.com/m3db/m3/src/m3ninx/index/segment/mem"
    34  	"github.com/m3db/m3/src/m3ninx/postings"
    35  	"github.com/m3db/m3/src/m3ninx/util"
    36  
    37  	"github.com/stretchr/testify/require"
    38  )
    39  
    40  var (
    41  	testOptions = NewOptions()
    42  
    43  	fewTestDocuments = []doc.Metadata{
    44  		{
    45  			Fields: []doc.Field{
    46  				{
    47  					Name:  []byte("fruit"),
    48  					Value: []byte("banana"),
    49  				},
    50  				{
    51  					Name:  []byte("color"),
    52  					Value: []byte("yellow"),
    53  				},
    54  			},
    55  		},
    56  		{
    57  			Fields: []doc.Field{
    58  				{
    59  					Name:  []byte("fruit"),
    60  					Value: []byte("apple"),
    61  				},
    62  				{
    63  					Name:  []byte("color"),
    64  					Value: []byte("red"),
    65  				},
    66  			},
    67  		},
    68  		{
    69  			ID: []byte("42"),
    70  			Fields: []doc.Field{
    71  				{
    72  					Name:  []byte("fruit"),
    73  					Value: []byte("pineapple"),
    74  				},
    75  				{
    76  					Name:  []byte("color"),
    77  					Value: []byte("yellow"),
    78  				},
    79  			},
    80  		},
    81  	}
    82  	lotsTestDocuments = util.MustReadDocs("../../../util/testdata/node_exporter.json", 2000)
    83  
    84  	testDocuments = []struct {
    85  		name string
    86  		docs []doc.Metadata
    87  	}{
    88  		{
    89  			name: "few documents",
    90  			docs: fewTestDocuments,
    91  		},
    92  		{
    93  			name: "many documents",
    94  			docs: lotsTestDocuments,
    95  		},
    96  	}
    97  )
    98  
    99  type testSegmentCase struct {
   100  	name               string
   101  	expected, observed sgmt.Segment
   102  }
   103  
   104  func newTestCases(t *testing.T, docs []doc.Metadata) []testSegmentCase {
   105  	memSeg, fstSeg := newTestSegments(t, docs)
   106  
   107  	fstWriter10Reader10 := newFSTSegmentWithVersion(t, memSeg, testOptions,
   108  		Version{Major: 1, Minor: 0}, /* writer version */
   109  		Version{Major: 1, Minor: 0} /* reader version */)
   110  
   111  	fstWriter11Reader10 := newFSTSegmentWithVersion(t, memSeg, testOptions,
   112  		Version{Major: 1, Minor: 1}, /* writer version */
   113  		Version{Major: 1, Minor: 0} /* reader version */)
   114  
   115  	fstWriter11Reader11 := newFSTSegmentWithVersion(t, memSeg, testOptions,
   116  		Version{Major: 1, Minor: 1}, /* writer version */
   117  		Version{Major: 1, Minor: 1} /* reader version */)
   118  
   119  	return []testSegmentCase{
   120  		{ // mem sgmt v latest fst
   121  			name:     "mem v fst",
   122  			expected: memSeg,
   123  			observed: fstSeg,
   124  		},
   125  		{ // mem sgmt v fst1.0
   126  			name:     "mem v fstWriter10Reader10",
   127  			expected: memSeg,
   128  			observed: fstWriter10Reader10,
   129  		},
   130  		{ // mem sgmt v fst (WriterV1.1; ReaderV1.0) -- i.e. ensure forward compatibility
   131  			name:     "mem v fstWriter11Reader10",
   132  			expected: memSeg,
   133  			observed: fstWriter11Reader10,
   134  		},
   135  		{ // mem sgmt v fst (WriterV1.1; ReaderV1.1)
   136  			name:     "mem v fstWriter11Reader11",
   137  			expected: memSeg,
   138  			observed: fstWriter11Reader11,
   139  		},
   140  	}
   141  }
   142  
   143  func TestConstruction(t *testing.T) {
   144  	for _, test := range testDocuments {
   145  		t.Run(test.name, func(t *testing.T) {
   146  			for _, tc := range newTestCases(t, test.docs) {
   147  				t.Run(tc.name, func(t *testing.T) {
   148  					// don't need to do anything here
   149  				})
   150  			}
   151  		})
   152  	}
   153  }
   154  
   155  func TestSizeEquals(t *testing.T) {
   156  	for _, test := range testDocuments {
   157  		t.Run(test.name, func(t *testing.T) {
   158  			for _, tc := range newTestCases(t, test.docs) {
   159  				t.Run(tc.name, func(t *testing.T) {
   160  					expSeg, obsSeg := tc.expected, tc.observed
   161  					require.Equal(t, expSeg.Size(), obsSeg.Size())
   162  				})
   163  			}
   164  		})
   165  	}
   166  }
   167  
   168  func TestFieldDoesNotExist(t *testing.T) {
   169  	for _, test := range testDocuments {
   170  		t.Run(test.name, func(t *testing.T) {
   171  			for _, tc := range newTestCases(t, test.docs) {
   172  				t.Run(tc.name, func(t *testing.T) {
   173  					elaborateFieldName := []byte("some-elaborate-field-that-does-not-exist-in-test-docs")
   174  					terms, err := tc.expected.TermsIterable().Terms(elaborateFieldName)
   175  					require.NoError(t, err)
   176  					require.False(t, terms.Next())
   177  					require.NoError(t, terms.Err())
   178  					require.NoError(t, terms.Close())
   179  
   180  					terms, err = tc.observed.TermsIterable().Terms(elaborateFieldName)
   181  					require.NoError(t, err)
   182  					require.False(t, terms.Next())
   183  					require.NoError(t, terms.Err())
   184  					require.NoError(t, terms.Close())
   185  
   186  					expectedReader, err := tc.expected.Reader()
   187  					require.NoError(t, err)
   188  					pl, err := expectedReader.MatchTerm(elaborateFieldName, []byte("."))
   189  					require.NoError(t, err)
   190  					require.True(t, pl.IsEmpty())
   191  					pl, err = expectedReader.MatchTerm(elaborateFieldName, []byte(".*"))
   192  					require.NoError(t, err)
   193  					require.True(t, pl.IsEmpty())
   194  					require.NoError(t, expectedReader.Close())
   195  
   196  					observedReader, err := tc.observed.Reader()
   197  					require.NoError(t, err)
   198  					pl, err = observedReader.MatchTerm(elaborateFieldName, []byte("."))
   199  					require.NoError(t, err)
   200  					require.True(t, pl.IsEmpty())
   201  					pl, err = observedReader.MatchTerm(elaborateFieldName, []byte(".*"))
   202  					require.NoError(t, err)
   203  					require.True(t, pl.IsEmpty())
   204  					require.NoError(t, observedReader.Close())
   205  				})
   206  			}
   207  		})
   208  	}
   209  }
   210  
   211  func TestFieldsEquals(t *testing.T) {
   212  	for _, test := range testDocuments {
   213  		t.Run(test.name, func(t *testing.T) {
   214  			for _, tc := range newTestCases(t, test.docs) {
   215  				t.Run(tc.name, func(t *testing.T) {
   216  					expSeg, obsSeg := tc.expected, tc.observed
   217  					expFieldsIter, err := expSeg.FieldsIterable().Fields()
   218  					require.NoError(t, err)
   219  					expFields := toSlice(t, expFieldsIter)
   220  
   221  					obsFieldsIter, err := obsSeg.FieldsIterable().Fields()
   222  					require.NoError(t, err)
   223  					obsFields := toSlice(t, obsFieldsIter)
   224  
   225  					assertSliceOfByteSlicesEqual(t, expFields, obsFields)
   226  				})
   227  			}
   228  		})
   229  	}
   230  }
   231  
   232  func TestContainsField(t *testing.T) {
   233  	for _, test := range testDocuments {
   234  		t.Run(test.name, func(t *testing.T) {
   235  			for _, tc := range newTestCases(t, test.docs) {
   236  				t.Run(tc.name, func(t *testing.T) {
   237  					expSeg, obsSeg := tc.expected, tc.observed
   238  					expFieldsIter, err := expSeg.FieldsIterable().Fields()
   239  					require.NoError(t, err)
   240  					expFields := toSlice(t, expFieldsIter)
   241  
   242  					for _, f := range expFields {
   243  						ok, err := obsSeg.ContainsField(f)
   244  						require.NoError(t, err)
   245  						require.True(t, ok)
   246  					}
   247  				})
   248  			}
   249  		})
   250  	}
   251  }
   252  
   253  func TestTermEquals(t *testing.T) {
   254  	for _, test := range testDocuments {
   255  		t.Run(test.name, func(t *testing.T) {
   256  			for _, tc := range newTestCases(t, test.docs) {
   257  				t.Run(tc.name, func(t *testing.T) {
   258  					expSeg, obsSeg := tc.expected, tc.observed
   259  					expFieldsIter, err := expSeg.FieldsIterable().Fields()
   260  					require.NoError(t, err)
   261  					expFields := toSlice(t, expFieldsIter)
   262  
   263  					obsFieldsIter, err := obsSeg.FieldsIterable().Fields()
   264  					require.NoError(t, err)
   265  					obsFields := toSlice(t, obsFieldsIter)
   266  
   267  					assertTermEquals := func(fields [][]byte) {
   268  						for _, f := range fields {
   269  							expTermsIter, err := expSeg.TermsIterable().Terms(f)
   270  							require.NoError(t, err)
   271  							expTerms := toTermPostings(t, expTermsIter)
   272  
   273  							obsTermsIter, err := obsSeg.TermsIterable().Terms(f)
   274  							require.NoError(t, err)
   275  							obsTerms := toTermPostings(t, obsTermsIter)
   276  							require.Equal(t, expTerms, obsTerms)
   277  						}
   278  					}
   279  					assertTermEquals(expFields)
   280  					assertTermEquals(obsFields)
   281  				})
   282  			}
   283  		})
   284  	}
   285  }
   286  
   287  func TestPostingsListEqualForMatchField(t *testing.T) {
   288  	for _, test := range testDocuments {
   289  		t.Run(test.name, func(t *testing.T) {
   290  			for _, tc := range newTestCases(t, test.docs) {
   291  				t.Run(tc.name, func(t *testing.T) {
   292  					expSeg, obsSeg := tc.expected, tc.observed
   293  					expReader, err := expSeg.Reader()
   294  					require.NoError(t, err)
   295  					obsReader, err := obsSeg.Reader()
   296  					require.NoError(t, err)
   297  
   298  					expFieldsIter, err := expSeg.FieldsIterable().Fields()
   299  					require.NoError(t, err)
   300  					expFields := toSlice(t, expFieldsIter)
   301  
   302  					for _, f := range expFields {
   303  						expPl, err := expReader.MatchField(f)
   304  						require.NoError(t, err)
   305  						obsPl, err := obsReader.MatchField(f)
   306  						require.NoError(t, err)
   307  						require.True(t, expPl.Equal(obsPl),
   308  							fmt.Sprintf("field[%s] - [%v] != [%v]", string(f), pprintIter(expPl), pprintIter(obsPl)))
   309  					}
   310  				})
   311  			}
   312  		})
   313  	}
   314  }
   315  
   316  func TestPostingsListEqualForMatchFieldWithFieldsPostingsList(t *testing.T) {
   317  	for _, test := range testDocuments {
   318  		t.Run(test.name, func(t *testing.T) {
   319  			for _, tc := range newTestCases(t, test.docs) {
   320  				tc := tc
   321  				t.Run(tc.name, func(t *testing.T) {
   322  					expSeg, obsSeg := tc.expected, tc.observed
   323  					expReader, err := expSeg.Reader()
   324  					require.NoError(t, err)
   325  					obsReader, err := obsSeg.Reader()
   326  					require.NoError(t, err)
   327  
   328  					obsFieldsPostingsIter, err := obsReader.FieldsPostingsList()
   329  					require.NoError(t, err)
   330  
   331  					for obsFieldsPostingsIter.Next() {
   332  						f, obsPl := obsFieldsPostingsIter.Current()
   333  						expPl, err := expReader.MatchField(f)
   334  						require.NoError(t, err)
   335  						require.True(t, expPl.Equal(obsPl),
   336  							fmt.Sprintf("field[%s] - [%v] != [%v]", string(f), pprintIter(expPl), pprintIter(obsPl)))
   337  					}
   338  				})
   339  			}
   340  		})
   341  	}
   342  }
   343  
   344  func TestPostingsListEqualForMatchTerm(t *testing.T) {
   345  	for _, test := range testDocuments {
   346  		t.Run(test.name, func(t *testing.T) {
   347  			memSeg, fstSeg := newTestSegments(t, test.docs)
   348  			memReader, err := memSeg.Reader()
   349  			require.NoError(t, err)
   350  			fstReader, err := fstSeg.Reader()
   351  			require.NoError(t, err)
   352  
   353  			memFieldsIter, err := memSeg.Fields()
   354  			require.NoError(t, err)
   355  			memFields := toSlice(t, memFieldsIter)
   356  
   357  			for _, f := range memFields {
   358  				memTermsIter, err := memSeg.Terms(f)
   359  				require.NoError(t, err)
   360  				memTerms := toTermPostings(t, memTermsIter)
   361  
   362  				for term := range memTerms {
   363  					memPl, err := memReader.MatchTerm(f, []byte(term))
   364  					require.NoError(t, err)
   365  					fstPl, err := fstReader.MatchTerm(f, []byte(term))
   366  					require.NoError(t, err)
   367  					require.True(t, memPl.Equal(fstPl),
   368  						fmt.Sprintf("%s:%s - [%v] != [%v]", string(f), term, pprintIter(memPl), pprintIter(fstPl)))
   369  				}
   370  			}
   371  		})
   372  	}
   373  }
   374  
   375  func TestPostingsListEqualForMatchTermWithFieldsPostingsList(t *testing.T) {
   376  	for _, test := range testDocuments {
   377  		t.Run(test.name, func(t *testing.T) {
   378  			memSeg, fstSeg := newTestSegments(t, test.docs)
   379  			memReader, err := memSeg.Reader()
   380  			require.NoError(t, err)
   381  			fstReader, err := fstSeg.Reader()
   382  			require.NoError(t, err)
   383  
   384  			fstFieldsPostingsIter, err := fstReader.FieldsPostingsList()
   385  			require.NoError(t, err)
   386  
   387  			for fstFieldsPostingsIter.Next() {
   388  				f, _ := fstFieldsPostingsIter.Current()
   389  
   390  				memTermsIter, err := memSeg.Terms(f)
   391  				require.NoError(t, err)
   392  				memTerms := toTermPostings(t, memTermsIter)
   393  
   394  				for term := range memTerms {
   395  					memPl, err := memReader.MatchTerm(f, []byte(term))
   396  					require.NoError(t, err)
   397  					fstPl, err := fstReader.MatchTerm(f, []byte(term))
   398  					require.NoError(t, err)
   399  					require.True(t, memPl.Equal(fstPl),
   400  						fmt.Sprintf("%s:%s - [%v] != [%v]", string(f), term, pprintIter(memPl), pprintIter(fstPl)))
   401  				}
   402  			}
   403  		})
   404  	}
   405  }
   406  
   407  func TestPostingsListContainsID(t *testing.T) {
   408  	for _, test := range testDocuments {
   409  		t.Run(test.name, func(t *testing.T) {
   410  			for _, tc := range newTestCases(t, test.docs) {
   411  				t.Run(tc.name, func(t *testing.T) {
   412  					expSeg, obsSeg := tc.expected, tc.observed
   413  					expIDsIter, err := expSeg.TermsIterable().Terms(doc.IDReservedFieldName)
   414  					require.NoError(t, err)
   415  					expIDs := toTermPostings(t, expIDsIter)
   416  					for i := range expIDs {
   417  						ok, err := obsSeg.ContainsID([]byte(i))
   418  						require.NoError(t, err)
   419  						require.True(t, ok)
   420  					}
   421  				})
   422  			}
   423  		})
   424  	}
   425  }
   426  
   427  func TestPostingsListRegexAll(t *testing.T) {
   428  	for _, test := range testDocuments {
   429  		t.Run(test.name, func(t *testing.T) {
   430  			for _, tc := range newTestCases(t, test.docs) {
   431  				t.Run(tc.name, func(t *testing.T) {
   432  					expSeg, obsSeg := tc.expected, tc.observed
   433  					fieldsIter, err := expSeg.FieldsIterable().Fields()
   434  					require.NoError(t, err)
   435  					fields := toSlice(t, fieldsIter)
   436  					for _, f := range fields {
   437  						reader, err := expSeg.Reader()
   438  						require.NoError(t, err)
   439  						c, err := index.CompileRegex([]byte(".*"))
   440  						require.NoError(t, err)
   441  						expPl, err := reader.MatchRegexp(f, c)
   442  						require.NoError(t, err)
   443  
   444  						obsReader, err := obsSeg.Reader()
   445  						require.NoError(t, err)
   446  						c, err = index.CompileRegex([]byte(".*"))
   447  						require.NoError(t, err)
   448  						obsPl, err := obsReader.MatchRegexp(f, c)
   449  						require.NoError(t, err)
   450  						require.True(t, expPl.Equal(obsPl))
   451  					}
   452  				})
   453  			}
   454  		})
   455  	}
   456  }
   457  
   458  func TestSegmentDocs(t *testing.T) {
   459  	for _, test := range testDocuments {
   460  		t.Run(test.name, func(t *testing.T) {
   461  			for _, tc := range newTestCases(t, test.docs) {
   462  				t.Run(tc.name, func(t *testing.T) {
   463  					expSeg, obsSeg := tc.expected, tc.observed
   464  					expReader, err := expSeg.Reader()
   465  					require.NoError(t, err)
   466  					obsReader, err := obsSeg.Reader()
   467  					require.NoError(t, err)
   468  
   469  					expFieldsIter, err := expSeg.FieldsIterable().Fields()
   470  					require.NoError(t, err)
   471  					expFields := toSlice(t, expFieldsIter)
   472  
   473  					for _, f := range expFields {
   474  						expTermsIter, err := expSeg.TermsIterable().Terms(f)
   475  						require.NoError(t, err)
   476  						expTerms := toTermPostings(t, expTermsIter)
   477  
   478  						for term := range expTerms {
   479  							expPl, err := expReader.MatchTerm(f, []byte(term))
   480  							require.NoError(t, err)
   481  							obsPl, err := obsReader.MatchTerm(f, []byte(term))
   482  							require.NoError(t, err)
   483  
   484  							expDocs, err := expReader.MetadataIterator(expPl)
   485  							require.NoError(t, err)
   486  							obsDocs, err := obsReader.MetadataIterator(obsPl)
   487  							require.NoError(t, err)
   488  
   489  							assertDocsEqual(t, expDocs, obsDocs)
   490  						}
   491  					}
   492  				})
   493  			}
   494  		})
   495  	}
   496  }
   497  
   498  func TestSegmentAllDocs(t *testing.T) {
   499  	for _, test := range testDocuments {
   500  		t.Run(test.name, func(t *testing.T) {
   501  			for _, tc := range newTestCases(t, test.docs) {
   502  				t.Run(tc.name, func(t *testing.T) {
   503  					expSeg, obsSeg := tc.expected, tc.observed
   504  					expReader, err := expSeg.Reader()
   505  					require.NoError(t, err)
   506  					obsReader, err := obsSeg.Reader()
   507  					require.NoError(t, err)
   508  					expDocs, err := expReader.AllDocs()
   509  					require.NoError(t, err)
   510  					obsDocs, err := obsReader.AllDocs()
   511  					require.NoError(t, err)
   512  					assertDocsEqual(t, expDocs, obsDocs)
   513  				})
   514  			}
   515  		})
   516  	}
   517  }
   518  
   519  func TestFieldsEqualsParallel(t *testing.T) {
   520  	for _, test := range testDocuments {
   521  		t.Run(test.name, func(t *testing.T) {
   522  			_, fstSeg := newTestSegments(t, test.docs)
   523  			var wg sync.WaitGroup
   524  			wg.Add(2)
   525  			go func() {
   526  				fstSeg.FieldsIterable().Fields()
   527  				wg.Done()
   528  			}()
   529  			go func() {
   530  				fstSeg.FieldsIterable().Fields()
   531  				wg.Done()
   532  			}()
   533  			wg.Wait()
   534  		})
   535  	}
   536  }
   537  
   538  func TestPostingsListLifecycleSimple(t *testing.T) {
   539  	_, fstSeg := newTestSegments(t, fewTestDocuments)
   540  	require.NoError(t, fstSeg.Close())
   541  
   542  	_, err := fstSeg.FieldsIterable().Fields()
   543  	require.Error(t, err)
   544  
   545  	_, err = fstSeg.TermsIterable().Terms(nil)
   546  	require.Error(t, err)
   547  
   548  	_, err = fstSeg.Reader()
   549  	require.Error(t, err)
   550  }
   551  
   552  func TestPostingsListReaderLifecycle(t *testing.T) {
   553  	_, fstSeg := newTestSegments(t, fewTestDocuments)
   554  	reader, err := fstSeg.Reader()
   555  	require.NoError(t, err)
   556  	require.NoError(t, reader.Close())
   557  	_, err = fstSeg.Reader()
   558  	require.NoError(t, err)
   559  }
   560  
   561  func TestSegmentReaderValidUntilClose(t *testing.T) {
   562  	_, fstSeg := newTestSegments(t, fewTestDocuments)
   563  
   564  	reader, err := fstSeg.Reader()
   565  	require.NoError(t, err)
   566  
   567  	// Close segment early, expect reader still valid until close.
   568  	err = fstSeg.Close()
   569  	require.NoError(t, err)
   570  
   571  	// Make sure all methods allow for calls until the reader is closed.
   572  	var (
   573  		list postings.List
   574  	)
   575  	list, err = reader.MatchField([]byte("fruit"))
   576  	require.NoError(t, err)
   577  	assertPostingsList(t, list, []postings.ID{0, 1, 2})
   578  
   579  	list, err = reader.MatchTerm([]byte("color"), []byte("yellow"))
   580  	require.NoError(t, err)
   581  	assertPostingsList(t, list, []postings.ID{0, 2})
   582  
   583  	re, err := index.CompileRegex([]byte("^.*apple$"))
   584  	require.NoError(t, err)
   585  	list, err = reader.MatchRegexp([]byte("fruit"), re)
   586  	require.NoError(t, err)
   587  	assertPostingsList(t, list, []postings.ID{1, 2})
   588  
   589  	list, err = reader.MatchAll()
   590  	require.NoError(t, err)
   591  	assertPostingsList(t, list, []postings.ID{0, 1, 2})
   592  
   593  	_, err = reader.Metadata(0)
   594  	require.NoError(t, err)
   595  
   596  	_, err = reader.MetadataIterator(list)
   597  	require.NoError(t, err)
   598  
   599  	_, err = reader.AllDocs()
   600  	require.NoError(t, err)
   601  
   602  	// Test returned iterators also work
   603  	re, err = index.CompileRegex([]byte("^.*apple$"))
   604  	require.NoError(t, err)
   605  	list, err = reader.MatchRegexp([]byte("fruit"), re)
   606  	require.NoError(t, err)
   607  	iter, err := reader.MetadataIterator(list)
   608  	require.NoError(t, err)
   609  	var docs int
   610  	for iter.Next() {
   611  		docs++
   612  		var fruitField doc.Field
   613  		for _, field := range iter.Current().Fields {
   614  			if bytes.Equal(field.Name, []byte("fruit")) {
   615  				fruitField = field
   616  				break
   617  			}
   618  		}
   619  		require.True(t, bytes.HasSuffix(fruitField.Value, []byte("apple")))
   620  	}
   621  	require.NoError(t, iter.Err())
   622  	require.NoError(t, iter.Close())
   623  
   624  	// Now close.
   625  	require.NoError(t, reader.Close())
   626  
   627  	// Make sure reader now starts returning errors.
   628  	_, err = reader.MatchTerm([]byte("color"), []byte("yellow"))
   629  	require.Error(t, err)
   630  }
   631  
   632  func newTestSegments(t *testing.T, docs []doc.Metadata) (memSeg sgmt.MutableSegment, fstSeg sgmt.Segment) {
   633  	s := newTestMemSegment(t)
   634  	for _, d := range docs {
   635  		_, err := s.Insert(d)
   636  		require.NoError(t, err)
   637  	}
   638  	return s, newFSTSegment(t, s, testOptions)
   639  }
   640  
   641  func newTestMemSegment(t *testing.T) sgmt.MutableSegment {
   642  	opts := mem.NewOptions()
   643  	s, err := mem.NewSegment(opts)
   644  	require.NoError(t, err)
   645  	return s
   646  }
   647  
   648  func assertSliceOfByteSlicesEqual(t *testing.T, a, b [][]byte) {
   649  	require.Equal(t, len(a), len(b), fmt.Sprintf("a = [%s], b = [%s]", pprint(a), pprint(b)))
   650  	require.Equal(t, a, b)
   651  }
   652  
   653  func assertDocsEqual(t *testing.T, a, b doc.MetadataIterator) {
   654  	aDocs, err := collectDocs(a)
   655  	require.NoError(t, err)
   656  	bDocs, err := collectDocs(b)
   657  	require.NoError(t, err)
   658  
   659  	require.Equal(t, len(aDocs), len(bDocs))
   660  
   661  	sort.Sort(doc.Documents(aDocs))
   662  	sort.Sort(doc.Documents(bDocs))
   663  
   664  	for i := range aDocs {
   665  		require.True(t, aDocs[i].Equal(bDocs[i]))
   666  	}
   667  }
   668  
   669  func assertPostingsList(t *testing.T, l postings.List, exp []postings.ID) {
   670  	it := l.Iterator()
   671  
   672  	defer func() {
   673  		require.False(t, it.Next(), "should exhaust just once")
   674  		require.NoError(t, it.Err(), "should not complete with error")
   675  		require.NoError(t, it.Close(), "should not encounter error on close")
   676  	}()
   677  
   678  	match := make(map[postings.ID]struct{}, len(exp))
   679  	for _, v := range exp {
   680  		match[v] = struct{}{}
   681  	}
   682  
   683  	for it.Next() {
   684  		curr := it.Current()
   685  
   686  		_, ok := match[curr]
   687  		if !ok {
   688  			require.Fail(t,
   689  				fmt.Sprintf("expected %d, not found in postings iter", curr))
   690  			return
   691  		}
   692  
   693  		delete(match, curr)
   694  	}
   695  
   696  	if len(match) == 0 {
   697  		// Success.
   698  		return
   699  	}
   700  
   701  	remaining := make([]int, 0, len(match))
   702  	for id := range match {
   703  		remaining = append(remaining, int(id))
   704  	}
   705  
   706  	msg := fmt.Sprintf("unmatched expected IDs %v, not found in postings iter",
   707  		remaining)
   708  	require.Fail(t, msg)
   709  }
   710  
   711  func collectDocs(iter doc.MetadataIterator) ([]doc.Metadata, error) {
   712  	var docs []doc.Metadata
   713  	for iter.Next() {
   714  		docs = append(docs, iter.Current())
   715  	}
   716  
   717  	if err := iter.Err(); err != nil {
   718  		return nil, err
   719  	}
   720  
   721  	return docs, nil
   722  }
   723  
   724  func pprint(a [][]byte) string {
   725  	var buf bytes.Buffer
   726  	for i, t := range a {
   727  		if i != 0 {
   728  			buf.WriteString(", ")
   729  		}
   730  		buf.WriteString(fmt.Sprintf("%d %s", i, string(t)))
   731  	}
   732  	return buf.String()
   733  }
   734  
   735  func pprintIter(pl postings.List) string {
   736  	var buf bytes.Buffer
   737  	iter := pl.Iterator()
   738  	for i := 0; iter.Next(); i++ {
   739  		if i != 0 {
   740  			buf.WriteString(", ")
   741  		}
   742  		buf.WriteString(fmt.Sprintf("%d", iter.Current()))
   743  	}
   744  	return buf.String()
   745  }
   746  
   747  func toSlice(t *testing.T, iter sgmt.OrderedBytesIterator) [][]byte {
   748  	elems := [][]byte{}
   749  	for iter.Next() {
   750  		curr := iter.Current()
   751  		bytes := append([]byte(nil), curr...)
   752  		elems = append(elems, bytes)
   753  	}
   754  	require.NoError(t, iter.Err())
   755  	require.NoError(t, iter.Close())
   756  	return elems
   757  }
   758  
   759  type termPostings map[string][]int
   760  
   761  func toTermPostings(t *testing.T, iter sgmt.TermsIterator) termPostings {
   762  	elems := make(termPostings)
   763  	for iter.Next() {
   764  		term, postings := iter.Current()
   765  		_, exists := elems[string(term)]
   766  		require.False(t, exists)
   767  
   768  		values := []int{}
   769  		it := postings.Iterator()
   770  		for it.Next() {
   771  			values = append(values, int(it.Current()))
   772  		}
   773  		sort.Sort(sort.IntSlice(values))
   774  
   775  		require.NoError(t, it.Err())
   776  		require.NoError(t, it.Close())
   777  
   778  		elems[string(term)] = values
   779  	}
   780  	require.NoError(t, iter.Err())
   781  	require.NoError(t, iter.Close())
   782  	return elems
   783  }