github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/mem/segment_test.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package mem
    22  
    23  import (
    24  	re "regexp"
    25  	"testing"
    26  
    27  	"github.com/m3db/m3/src/m3ninx/doc"
    28  	"github.com/m3db/m3/src/m3ninx/index"
    29  	sgmt "github.com/m3db/m3/src/m3ninx/index/segment"
    30  
    31  	"github.com/stretchr/testify/require"
    32  )
    33  
    34  var (
    35  	testOptions = NewOptions()
    36  
    37  	testDocuments = []doc.Metadata{
    38  		{
    39  			Fields: []doc.Field{
    40  				{
    41  					Name:  []byte("fruit"),
    42  					Value: []byte("banana"),
    43  				},
    44  				{
    45  					Name:  []byte("color"),
    46  					Value: []byte("yellow"),
    47  				},
    48  			},
    49  		},
    50  		{
    51  			Fields: []doc.Field{
    52  				{
    53  					Name:  []byte("fruit"),
    54  					Value: []byte("apple"),
    55  				},
    56  				{
    57  					Name:  []byte("color"),
    58  					Value: []byte("red"),
    59  				},
    60  			},
    61  		},
    62  		{
    63  			ID: []byte("42"),
    64  			Fields: []doc.Field{
    65  				{
    66  					Name:  []byte("fruit"),
    67  					Value: []byte("pineapple"),
    68  				},
    69  				{
    70  					Name:  []byte("color"),
    71  					Value: []byte("yellow"),
    72  				},
    73  			},
    74  		},
    75  	}
    76  )
    77  
    78  func TestSegmentInsert(t *testing.T) {
    79  	tests := []struct {
    80  		name  string
    81  		input doc.Metadata
    82  	}{
    83  		{
    84  			name: "document without an ID",
    85  			input: doc.Metadata{
    86  				Fields: []doc.Field{
    87  					{
    88  						Name:  []byte("apple"),
    89  						Value: []byte("red"),
    90  					},
    91  				},
    92  			},
    93  		},
    94  		{
    95  			name: "document with an ID",
    96  			input: doc.Metadata{
    97  				ID: []byte("123"),
    98  				Fields: []doc.Field{
    99  					{
   100  						Name:  []byte("apple"),
   101  						Value: []byte("red"),
   102  					},
   103  				},
   104  			},
   105  		},
   106  	}
   107  
   108  	for _, test := range tests {
   109  		t.Run(test.name, func(t *testing.T) {
   110  			segment, err := NewSegment(testOptions)
   111  			require.NoError(t, err)
   112  			require.Equal(t, int64(0), segment.Size())
   113  
   114  			id, err := segment.Insert(test.input)
   115  			require.NoError(t, err)
   116  			require.Equal(t, int64(1), segment.Size())
   117  			ok, err := segment.ContainsID(id)
   118  			require.NoError(t, err)
   119  			require.True(t, ok)
   120  
   121  			r, err := segment.Reader()
   122  			require.NoError(t, err)
   123  
   124  			testDocument(t, test.input, r)
   125  
   126  			// The ID must be searchable.
   127  			pl, err := r.MatchTerm(doc.IDReservedFieldName, id)
   128  			require.NoError(t, err)
   129  
   130  			iter, err := r.MetadataIterator(pl)
   131  			require.NoError(t, err)
   132  
   133  			require.True(t, iter.Next())
   134  			actual := iter.Current()
   135  
   136  			require.True(t, compareDocs(test.input, actual))
   137  
   138  			require.NoError(t, iter.Close())
   139  			require.NoError(t, r.Close())
   140  			require.NoError(t, segment.Close())
   141  		})
   142  	}
   143  }
   144  
   145  func TestSegmentInsertDuplicateID(t *testing.T) {
   146  	var (
   147  		id    = []byte("123")
   148  		first = doc.Metadata{
   149  			ID: id,
   150  			Fields: []doc.Field{
   151  				{
   152  					Name:  []byte("apple"),
   153  					Value: []byte("red"),
   154  				},
   155  			},
   156  		}
   157  		second = doc.Metadata{
   158  			ID: id,
   159  			Fields: []doc.Field{
   160  				{
   161  					Name:  []byte("apple"),
   162  					Value: []byte("red"),
   163  				},
   164  				{
   165  					Name:  []byte("variety"),
   166  					Value: []byte("fuji"),
   167  				},
   168  			},
   169  		}
   170  	)
   171  
   172  	segment, err := NewSegment(testOptions)
   173  	require.NoError(t, err)
   174  	require.Equal(t, int64(0), segment.Size())
   175  
   176  	id, err = segment.Insert(first)
   177  	require.NoError(t, err)
   178  	ok, err := segment.ContainsID(id)
   179  	require.NoError(t, err)
   180  	require.True(t, ok)
   181  	require.Equal(t, int64(1), segment.Size())
   182  
   183  	r, err := segment.Reader()
   184  	require.NoError(t, err)
   185  
   186  	pl, err := r.MatchTerm(doc.IDReservedFieldName, id)
   187  	require.NoError(t, err)
   188  
   189  	iter, err := r.MetadataIterator(pl)
   190  	require.NoError(t, err)
   191  
   192  	require.True(t, iter.Next())
   193  	actual := iter.Current()
   194  
   195  	// Only the first document should be indexed.
   196  	require.True(t, compareDocs(first, actual))
   197  	require.False(t, compareDocs(second, actual))
   198  
   199  	require.NoError(t, iter.Close())
   200  	require.NoError(t, r.Close())
   201  	require.NoError(t, segment.Close())
   202  
   203  	// ensure segment returns size == 0 once it's closed.
   204  	require.Equal(t, int64(0), segment.Size())
   205  }
   206  
   207  func TestSegmentInsertBatch(t *testing.T) {
   208  	tests := []struct {
   209  		name  string
   210  		input index.Batch
   211  	}{
   212  		{
   213  			name: "valid batch",
   214  			input: index.NewBatch(
   215  				[]doc.Metadata{
   216  					{
   217  						Fields: []doc.Field{
   218  							{
   219  								Name:  []byte("fruit"),
   220  								Value: []byte("apple"),
   221  							},
   222  							{
   223  								Name:  []byte("color"),
   224  								Value: []byte("red"),
   225  							},
   226  						},
   227  					},
   228  					{
   229  						ID: []byte("831992"),
   230  						Fields: []doc.Field{
   231  							{
   232  								Name:  []byte("fruit"),
   233  								Value: []byte("banana"),
   234  							},
   235  							{
   236  								Name:  []byte("color"),
   237  								Value: []byte("yellow"),
   238  							},
   239  						},
   240  					},
   241  				},
   242  			),
   243  		},
   244  	}
   245  
   246  	for _, test := range tests {
   247  		t.Run(test.name, func(t *testing.T) {
   248  			segment, err := NewSegment(testOptions)
   249  			require.NoError(t, err)
   250  			require.Equal(t, int64(0), segment.Size())
   251  
   252  			err = segment.InsertBatch(test.input)
   253  			require.NoError(t, err)
   254  			require.Equal(t, int64(len(test.input.Docs)), segment.Size())
   255  
   256  			r, err := segment.Reader()
   257  			require.NoError(t, err)
   258  
   259  			for _, doc := range test.input.Docs {
   260  				testDocument(t, doc, r)
   261  			}
   262  
   263  			require.NoError(t, r.Close())
   264  			require.NoError(t, segment.Close())
   265  		})
   266  	}
   267  }
   268  
   269  func TestSegmentInsertBatchError(t *testing.T) {
   270  	tests := []struct {
   271  		name  string
   272  		input index.Batch
   273  	}{
   274  		{
   275  			name: "invalid document",
   276  			input: index.NewBatch(
   277  				[]doc.Metadata{
   278  					{
   279  						Fields: []doc.Field{
   280  							{
   281  								Name:  []byte("fruit"),
   282  								Value: []byte("apple"),
   283  							},
   284  							{
   285  								Name:  []byte("color\xff"),
   286  								Value: []byte("red"),
   287  							},
   288  						},
   289  					},
   290  					{
   291  						Fields: []doc.Field{
   292  							{
   293  								Name:  []byte("fruit"),
   294  								Value: []byte("banana"),
   295  							},
   296  							{
   297  								Name:  []byte("color"),
   298  								Value: []byte("yellow"),
   299  							},
   300  						},
   301  					},
   302  				},
   303  			),
   304  		},
   305  	}
   306  
   307  	for _, test := range tests {
   308  		t.Run(test.name, func(t *testing.T) {
   309  			segment, err := NewSegment(testOptions)
   310  			require.Equal(t, int64(0), segment.Size())
   311  			require.NoError(t, err)
   312  
   313  			err = segment.InsertBatch(test.input)
   314  			require.Error(t, err)
   315  			require.False(t, index.IsBatchPartialError(err))
   316  			require.Equal(t, int64(0), segment.Size())
   317  		})
   318  	}
   319  }
   320  
   321  func TestSegmentInsertBatchPartialError(t *testing.T) {
   322  	tests := []struct {
   323  		name  string
   324  		input index.Batch
   325  	}{
   326  		{
   327  			name: "invalid document",
   328  			input: index.NewBatch(
   329  				[]doc.Metadata{
   330  					{
   331  						Fields: []doc.Field{
   332  							{
   333  								Name:  []byte("fruit"),
   334  								Value: []byte("apple"),
   335  							},
   336  							{
   337  								Name:  []byte("color\xff"),
   338  								Value: []byte("red"),
   339  							},
   340  						},
   341  					},
   342  					{
   343  
   344  						Fields: []doc.Field{
   345  							{
   346  								Name:  []byte("fruit"),
   347  								Value: []byte("banana"),
   348  							},
   349  							{
   350  								Name:  []byte("color"),
   351  								Value: []byte("yellow"),
   352  							},
   353  						},
   354  					},
   355  				},
   356  				index.AllowPartialUpdates(),
   357  			),
   358  		},
   359  		{
   360  			name: "duplicate ID",
   361  			input: index.NewBatch(
   362  				[]doc.Metadata{
   363  					{
   364  						ID: []byte("831992"),
   365  						Fields: []doc.Field{
   366  							{
   367  								Name:  []byte("fruit"),
   368  								Value: []byte("apple"),
   369  							},
   370  							{
   371  								Name:  []byte("color"),
   372  								Value: []byte("red"),
   373  							},
   374  						},
   375  					},
   376  					{
   377  						ID: []byte("831992"),
   378  						Fields: []doc.Field{
   379  							{
   380  								Name:  []byte("fruit"),
   381  								Value: []byte("banana"),
   382  							},
   383  							{
   384  								Name:  []byte("color"),
   385  								Value: []byte("yellow"),
   386  							},
   387  						},
   388  					},
   389  				},
   390  				index.AllowPartialUpdates(),
   391  			),
   392  		},
   393  	}
   394  
   395  	for _, test := range tests {
   396  		t.Run(test.name, func(t *testing.T) {
   397  			segment, err := NewSegment(testOptions)
   398  			require.NoError(t, err)
   399  			require.Equal(t, int64(0), segment.Size())
   400  
   401  			err = segment.InsertBatch(test.input)
   402  			require.Error(t, err)
   403  			require.True(t, index.IsBatchPartialError(err))
   404  			require.Equal(t, int64(1), segment.Size())
   405  
   406  			batchErr := err.(*index.BatchPartialError)
   407  			errs := batchErr.Errs()
   408  			failedDocs := make(map[int]struct{}, len(errs))
   409  			for _, err := range errs {
   410  				failedDocs[err.Idx] = struct{}{}
   411  			}
   412  
   413  			r, err := segment.Reader()
   414  			require.NoError(t, err)
   415  
   416  			for i, doc := range test.input.Docs {
   417  				_, ok := failedDocs[i]
   418  				if ok {
   419  					// Don't test documents which were not indexed.
   420  					continue
   421  				}
   422  				testDocument(t, doc, r)
   423  			}
   424  
   425  			require.NoError(t, r.Close())
   426  			require.NoError(t, segment.Close())
   427  		})
   428  	}
   429  }
   430  
   431  func TestSegmentInsertBatchPartialErrorInvalidDoc(t *testing.T) {
   432  	b1 := index.NewBatch(
   433  		[]doc.Metadata{
   434  			{
   435  				ID: []byte("abc"),
   436  				Fields: []doc.Field{
   437  					{
   438  						Name:  []byte("fruit"),
   439  						Value: []byte("apple"),
   440  					},
   441  					{
   442  						Name:  []byte("color\xff"),
   443  						Value: []byte("red"),
   444  					},
   445  				},
   446  			},
   447  			{
   448  				ID: []byte("abc"),
   449  				Fields: []doc.Field{
   450  					{
   451  						Name:  []byte("fruit"),
   452  						Value: []byte("banana"),
   453  					},
   454  					{
   455  						Name:  []byte("color"),
   456  						Value: []byte("yellow"),
   457  					},
   458  				},
   459  			},
   460  		},
   461  		index.AllowPartialUpdates(),
   462  	)
   463  	segment, err := NewSegment(testOptions)
   464  	require.NoError(t, err)
   465  
   466  	err = segment.InsertBatch(b1)
   467  	require.Error(t, err)
   468  	require.True(t, index.IsBatchPartialError(err))
   469  	be := err.(*index.BatchPartialError)
   470  	require.Len(t, be.Errs(), 1)
   471  	require.Equal(t, be.Errs()[0].Idx, 0)
   472  
   473  	r, err := segment.Reader()
   474  	require.NoError(t, err)
   475  	iter, err := r.AllDocs()
   476  	require.NoError(t, err)
   477  	require.True(t, iter.Next())
   478  	require.Equal(t, b1.Docs[1], iter.Current())
   479  	require.False(t, iter.Next())
   480  	require.NoError(t, iter.Close())
   481  	require.NoError(t, r.Close())
   482  	require.NoError(t, segment.Close())
   483  }
   484  
   485  func TestSegmentContainsID(t *testing.T) {
   486  	b1 := index.NewBatch(
   487  		[]doc.Metadata{
   488  			{
   489  				ID: []byte("abc"),
   490  				Fields: []doc.Field{
   491  					{
   492  						Name:  []byte("fruit"),
   493  						Value: []byte("apple"),
   494  					},
   495  					{
   496  						Name:  []byte("color\xff"),
   497  						Value: []byte("red"),
   498  					},
   499  				},
   500  			},
   501  			{
   502  				ID: []byte("abc"),
   503  				Fields: []doc.Field{
   504  					{
   505  						Name:  []byte("fruit"),
   506  						Value: []byte("banana"),
   507  					},
   508  					{
   509  						Name:  []byte("color"),
   510  						Value: []byte("yellow"),
   511  					},
   512  				},
   513  			},
   514  		},
   515  		index.AllowPartialUpdates(),
   516  	)
   517  	segment, err := NewSegment(testOptions)
   518  	require.NoError(t, err)
   519  	ok, err := segment.ContainsID([]byte("abc"))
   520  	require.NoError(t, err)
   521  	require.False(t, ok)
   522  
   523  	err = segment.InsertBatch(b1)
   524  	require.Error(t, err)
   525  	require.True(t, index.IsBatchPartialError(err))
   526  	be := err.(*index.BatchPartialError)
   527  	require.Len(t, be.Errs(), 1)
   528  	require.Equal(t, be.Errs()[0].Idx, 0)
   529  
   530  	ok, err = segment.ContainsID([]byte("abc"))
   531  	require.NoError(t, err)
   532  	require.True(t, ok)
   533  
   534  	r, err := segment.Reader()
   535  	require.NoError(t, err)
   536  	iter, err := r.AllDocs()
   537  	require.NoError(t, err)
   538  	require.True(t, iter.Next())
   539  	require.Equal(t, b1.Docs[1], iter.Current())
   540  	require.False(t, iter.Next())
   541  	require.NoError(t, iter.Close())
   542  	require.NoError(t, r.Close())
   543  	require.NoError(t, segment.Close())
   544  }
   545  
   546  func TestSegmentContainsField(t *testing.T) {
   547  	docs := []doc.Metadata{
   548  		{
   549  			ID: []byte("abc"),
   550  			Fields: []doc.Field{
   551  				{
   552  					Name:  []byte("fruit"),
   553  					Value: []byte("apple"),
   554  				},
   555  				{
   556  					Name:  []byte("colour"),
   557  					Value: []byte("red"),
   558  				},
   559  			},
   560  		},
   561  		{
   562  			ID: []byte("cde"),
   563  			Fields: []doc.Field{
   564  				{
   565  					Name:  []byte("fruit"),
   566  					Value: []byte("banana"),
   567  				},
   568  				{
   569  					Name:  []byte("color"),
   570  					Value: []byte("yellow"),
   571  				},
   572  			},
   573  		},
   574  	}
   575  	b1 := index.NewBatch(docs, index.AllowPartialUpdates())
   576  	segment, err := NewSegment(testOptions)
   577  	require.NoError(t, err)
   578  
   579  	err = segment.InsertBatch(b1)
   580  	require.NoError(t, err)
   581  	for _, d := range docs {
   582  		for _, f := range d.Fields {
   583  			ok, err := segment.ContainsField(f.Name)
   584  			require.NoError(t, err)
   585  			require.True(t, ok)
   586  		}
   587  	}
   588  }
   589  
   590  func TestSegmentInsertBatchPartialErrorAlreadyIndexing(t *testing.T) {
   591  	b1 := index.NewBatch(
   592  		[]doc.Metadata{
   593  			{
   594  				ID: []byte("abc"),
   595  				Fields: []doc.Field{
   596  					{
   597  						Name:  []byte("fruit"),
   598  						Value: []byte("apple"),
   599  					},
   600  					{
   601  						Name:  []byte("color"),
   602  						Value: []byte("red"),
   603  					},
   604  				},
   605  			},
   606  		},
   607  		index.AllowPartialUpdates())
   608  
   609  	b2 := index.NewBatch(
   610  		[]doc.Metadata{
   611  			{
   612  				ID: []byte("abc"),
   613  				Fields: []doc.Field{
   614  					{
   615  						Name:  []byte("fruit"),
   616  						Value: []byte("apple"),
   617  					},
   618  					{
   619  						Name:  []byte("color"),
   620  						Value: []byte("red"),
   621  					},
   622  				},
   623  			},
   624  			{
   625  				ID: []byte("cdef"),
   626  				Fields: []doc.Field{
   627  					{
   628  						Name:  []byte("color"),
   629  						Value: []byte("blue"),
   630  					},
   631  				},
   632  			},
   633  			{
   634  				ID: []byte("cdef"),
   635  				Fields: []doc.Field{
   636  					{
   637  						Name:  []byte("color"),
   638  						Value: []byte("blue"),
   639  					},
   640  				},
   641  			},
   642  		},
   643  		index.AllowPartialUpdates())
   644  
   645  	segment, err := NewSegment(testOptions)
   646  	require.NoError(t, err)
   647  
   648  	err = segment.InsertBatch(b1)
   649  	require.NoError(t, err)
   650  
   651  	err = segment.InsertBatch(b2)
   652  	require.Error(t, err)
   653  	require.True(t, index.IsBatchPartialError(err))
   654  	errs := err.(*index.BatchPartialError).Errs()
   655  	require.Len(t, errs, 1)
   656  	require.Equal(t, 2, errs[0].Idx)
   657  }
   658  
   659  func TestSegmentReaderMatchExact(t *testing.T) {
   660  	docs := []doc.Metadata{
   661  		{
   662  			Fields: []doc.Field{
   663  				{
   664  					Name:  []byte("fruit"),
   665  					Value: []byte("apple"),
   666  				},
   667  				{
   668  					Name:  []byte("color"),
   669  					Value: []byte("red"),
   670  				},
   671  			},
   672  		},
   673  		{
   674  			ID: []byte("83"),
   675  			Fields: []doc.Field{
   676  				{
   677  					Name:  []byte("fruit"),
   678  					Value: []byte("banana"),
   679  				},
   680  				{
   681  					Name:  []byte("color"),
   682  					Value: []byte("yellow"),
   683  				},
   684  			},
   685  		},
   686  		{
   687  			Fields: []doc.Field{
   688  				{
   689  					Name:  []byte("fruit"),
   690  					Value: []byte("apple"),
   691  				},
   692  				{
   693  					Name:  []byte("color"),
   694  					Value: []byte("green"),
   695  				},
   696  			},
   697  		},
   698  	}
   699  
   700  	segment, err := NewSegment(testOptions)
   701  	require.NoError(t, err)
   702  
   703  	for _, doc := range docs {
   704  		_, err = segment.Insert(doc)
   705  		require.NoError(t, err)
   706  	}
   707  
   708  	err = segment.Seal()
   709  	require.NoError(t, err)
   710  
   711  	r, err := segment.Reader()
   712  	require.NoError(t, err)
   713  
   714  	pl, err := r.MatchTerm([]byte("fruit"), []byte("apple"))
   715  	require.NoError(t, err)
   716  
   717  	iter, err := r.MetadataIterator(pl)
   718  	require.NoError(t, err)
   719  
   720  	actualDocs := make([]doc.Metadata, 0)
   721  	for iter.Next() {
   722  		actualDocs = append(actualDocs, iter.Current())
   723  	}
   724  
   725  	require.NoError(t, iter.Err())
   726  	require.NoError(t, iter.Close())
   727  
   728  	expectedDocs := []doc.Metadata{docs[0], docs[2]}
   729  	require.Equal(t, len(expectedDocs), len(actualDocs))
   730  	for i := range actualDocs {
   731  		require.True(t, compareDocs(expectedDocs[i], actualDocs[i]))
   732  	}
   733  
   734  	require.NoError(t, r.Close())
   735  	require.NoError(t, segment.Close())
   736  }
   737  
   738  func TestSegmentSealLifecycle(t *testing.T) {
   739  	segment, err := NewSegment(testOptions)
   740  	require.NoError(t, err)
   741  
   742  	err = segment.Seal()
   743  	require.NoError(t, err)
   744  
   745  	err = segment.Seal()
   746  	require.Error(t, err)
   747  }
   748  
   749  func TestSegmentSealCloseLifecycle(t *testing.T) {
   750  	segment, err := NewSegment(testOptions)
   751  	require.NoError(t, err)
   752  
   753  	require.NoError(t, segment.Close())
   754  	err = segment.Seal()
   755  	require.Error(t, err)
   756  }
   757  
   758  func TestSegmentIsSealed(t *testing.T) {
   759  	segment, err := NewSegment(testOptions)
   760  	require.NoError(t, err)
   761  
   762  	require.False(t, segment.IsSealed())
   763  
   764  	err = segment.Seal()
   765  	require.NoError(t, err)
   766  	require.True(t, segment.IsSealed())
   767  
   768  	require.NoError(t, segment.Close())
   769  	require.False(t, segment.IsSealed())
   770  }
   771  
   772  func TestSegmentFields(t *testing.T) {
   773  	segment, err := NewSegment(testOptions)
   774  	require.NoError(t, err)
   775  
   776  	knownsFields := map[string]struct{}{}
   777  	for _, d := range testDocuments {
   778  		for _, f := range d.Fields {
   779  			knownsFields[string(f.Name)] = struct{}{}
   780  		}
   781  		_, err = segment.Insert(d)
   782  		require.NoError(t, err)
   783  	}
   784  
   785  	err = segment.Seal()
   786  	require.NoError(t, err)
   787  
   788  	fieldsIter, err := segment.Fields()
   789  	require.NoError(t, err)
   790  
   791  	fields := toSlice(t, fieldsIter)
   792  	for _, f := range fields {
   793  		delete(knownsFields, string(f))
   794  	}
   795  	require.Empty(t, knownsFields)
   796  }
   797  
   798  func TestSegmentTerms(t *testing.T) {
   799  	segment, err := NewSegment(testOptions)
   800  	require.NoError(t, err)
   801  
   802  	knownsFields := map[string]map[string]struct{}{}
   803  	for _, d := range testDocuments {
   804  		for _, f := range d.Fields {
   805  			knownVals, ok := knownsFields[string(f.Name)]
   806  			if !ok {
   807  				knownVals = make(map[string]struct{})
   808  				knownsFields[string(f.Name)] = knownVals
   809  			}
   810  			knownVals[string(f.Value)] = struct{}{}
   811  		}
   812  		_, err = segment.Insert(d)
   813  		require.NoError(t, err)
   814  	}
   815  
   816  	err = segment.Seal()
   817  	require.NoError(t, err)
   818  
   819  	for field, expectedTerms := range knownsFields {
   820  		termsIter, err := segment.Terms([]byte(field))
   821  		require.NoError(t, err)
   822  		terms := toTermPostings(t, termsIter)
   823  		for term := range terms {
   824  			delete(expectedTerms, term)
   825  		}
   826  		require.Empty(t, expectedTerms)
   827  	}
   828  }
   829  
   830  func TestSegmentReaderMatchRegex(t *testing.T) {
   831  	docs := testDocuments
   832  	segment, err := NewSegment(testOptions)
   833  	require.NoError(t, err)
   834  
   835  	for _, doc := range docs {
   836  		_, err = segment.Insert(doc)
   837  		require.NoError(t, err)
   838  	}
   839  
   840  	r, err := segment.Reader()
   841  	require.NoError(t, err)
   842  
   843  	field, regexp := []byte("fruit"), []byte(".*ple")
   844  	compiled := re.MustCompile(string(regexp))
   845  	pl, err := r.MatchRegexp(field, index.CompiledRegex{Simple: compiled})
   846  	require.NoError(t, err)
   847  
   848  	iter, err := r.MetadataIterator(pl)
   849  	require.NoError(t, err)
   850  
   851  	actualDocs := make([]doc.Metadata, 0)
   852  	for iter.Next() {
   853  		actualDocs = append(actualDocs, iter.Current())
   854  	}
   855  
   856  	require.NoError(t, iter.Err())
   857  	require.NoError(t, iter.Close())
   858  
   859  	expectedDocs := []doc.Metadata{docs[1], docs[2]}
   860  	require.Equal(t, len(expectedDocs), len(actualDocs))
   861  	for i := range actualDocs {
   862  		require.True(t, compareDocs(expectedDocs[i], actualDocs[i]))
   863  	}
   864  
   865  	require.NoError(t, r.Close())
   866  	require.NoError(t, segment.Close())
   867  }
   868  
   869  func testDocument(t *testing.T, d doc.Metadata, r index.Reader) {
   870  	for _, f := range d.Fields {
   871  		name, value := f.Name, f.Value
   872  		pl, err := r.MatchTerm(name, value)
   873  		require.NoError(t, err)
   874  
   875  		iter, err := r.MetadataIterator(pl)
   876  		require.NoError(t, err)
   877  
   878  		require.True(t, iter.Next())
   879  		actual := iter.Current()
   880  
   881  		// The document must have an ID.
   882  		hasID := actual.ID != nil
   883  		require.True(t, hasID)
   884  
   885  		require.True(t, compareDocs(d, actual))
   886  
   887  		require.False(t, iter.Next())
   888  		require.NoError(t, iter.Err())
   889  		require.NoError(t, iter.Close())
   890  	}
   891  }
   892  
   893  // compareDocs returns whether two documents are equal. If the actual doc contains
   894  // an ID but the expected doc does not then the ID is excluded from the comparison
   895  // since it was auto-generated.
   896  func compareDocs(expected, actual doc.Metadata) bool {
   897  	if actual.HasID() && !expected.HasID() {
   898  		actual.ID = nil
   899  	}
   900  	return expected.Equal(actual)
   901  }
   902  
   903  func toSlice(t *testing.T, iter sgmt.OrderedBytesIterator) [][]byte {
   904  	elems := [][]byte{}
   905  	for iter.Next() {
   906  		elems = append(elems, iter.Current())
   907  	}
   908  	require.NoError(t, iter.Err())
   909  	require.NoError(t, iter.Close())
   910  	return elems
   911  }