github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/convert/convert_test.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  package convert_test
    21  
    22  import (
    23  	"bytes"
    24  	"encoding/hex"
    25  	"testing"
    26  	"unicode/utf8"
    27  
    28  	"github.com/m3db/m3/src/dbnode/storage/index/convert"
    29  	"github.com/m3db/m3/src/m3ninx/doc"
    30  	"github.com/m3db/m3/src/x/checked"
    31  	"github.com/m3db/m3/src/x/ident"
    32  	"github.com/m3db/m3/src/x/pool"
    33  	"github.com/m3db/m3/src/x/serialize"
    34  	"github.com/m3db/m3/src/x/test"
    35  
    36  	"github.com/stretchr/testify/assert"
    37  	"github.com/stretchr/testify/require"
    38  )
    39  
    40  var (
    41  	testOpts convert.Opts
    42  )
    43  
    44  func init() {
    45  	// NB: allocating once to save memory in tests
    46  	bytesPool := pool.NewCheckedBytesPool(nil, nil, func(s []pool.Bucket) pool.BytesPool {
    47  		return pool.NewBytesPool(s, nil)
    48  	})
    49  	bytesPool.Init()
    50  	idPool := ident.NewPool(bytesPool, ident.PoolOptions{})
    51  	testOpts.CheckedBytesPool = bytesPool
    52  	testOpts.IdentPool = idPool
    53  }
    54  
    55  func TestFromSeriesIDAndTagsInvalid(t *testing.T) {
    56  	id := ident.StringID("foo")
    57  	tags := ident.NewTags(
    58  		ident.StringTag(string(convert.ReservedFieldNameID), "value"),
    59  	)
    60  	_, err := convert.FromSeriesIDAndTags(id, tags)
    61  	assert.Error(t, err)
    62  }
    63  
    64  func TestFromSeriesIDAndTagIteratorInvalid(t *testing.T) {
    65  	id := ident.StringID("foo")
    66  	tags := ident.NewTags(
    67  		ident.StringTag(string(convert.ReservedFieldNameID), "value"),
    68  	)
    69  	_, err := convert.FromSeriesIDAndTagIter(id, ident.NewTagsIterator(tags))
    70  	assert.Error(t, err)
    71  }
    72  
    73  func TestFromSeriesIDAndTagsValid(t *testing.T) {
    74  	id := ident.StringID("foo")
    75  	tags := ident.NewTags(
    76  		ident.StringTag("bar", "baz"),
    77  	)
    78  	d, err := convert.FromSeriesIDAndTags(id, tags)
    79  	assert.NoError(t, err)
    80  	assertContentsMatch(t, id, tags.Values(), d)
    81  	assert.False(t, test.ByteSlicesBackedBySameData(id.Bytes(), d.ID))
    82  }
    83  
    84  func TestFromSeriesIDAndTagsReuseBytesFromSeriesId(t *testing.T) {
    85  	tests := []struct {
    86  		name string
    87  		id   string
    88  	}{
    89  		{
    90  			name: "tags in ID",
    91  			id:   "bar=baz,quip=quix",
    92  		},
    93  		{
    94  			name: "tags in ID with specific format",
    95  			id:   `{bar="baz",quip="quix"}`,
    96  		},
    97  		{
    98  			name: "tags in ID with specific format reverse order",
    99  			id:   `{quip="quix",bar="baz"}`,
   100  		},
   101  		{
   102  			name: "inexact tag occurrence in ID",
   103  			id:   "quixquip_bazillion_barometers",
   104  		},
   105  	}
   106  	tags := ident.NewTags(
   107  		ident.StringTag("bar", "baz"),
   108  		ident.StringTag("quip", "quix"),
   109  	)
   110  
   111  	for _, tt := range tests {
   112  		t.Run(tt.name, func(t *testing.T) {
   113  			seriesID := ident.StringID(tt.id)
   114  			d, err := convert.FromSeriesIDAndTags(seriesID, tags)
   115  			assert.NoError(t, err)
   116  			assertContentsMatch(t, seriesID, tags.Values(), d)
   117  			assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID))
   118  			for i := range d.Fields {
   119  				assertBackedBySameData(t, d.ID, d.Fields[i].Name)
   120  				assertBackedBySameData(t, d.ID, d.Fields[i].Value)
   121  			}
   122  		})
   123  	}
   124  }
   125  
   126  func TestFromSeriesIDAndTagIterValid(t *testing.T) {
   127  	id := ident.StringID("foo")
   128  	tags := ident.NewTags(
   129  		ident.StringTag("bar", "baz"),
   130  	)
   131  	d, err := convert.FromSeriesIDAndTagIter(id, ident.NewTagsIterator(tags))
   132  	assert.NoError(t, err)
   133  	assertContentsMatch(t, id, tags.Values(), d)
   134  	assert.False(t, test.ByteSlicesBackedBySameData(id.Bytes(), d.ID))
   135  }
   136  
   137  func TestFromSeriesIDAndTagIterReuseBytesFromSeriesId(t *testing.T) {
   138  	tests := []struct {
   139  		name string
   140  		id   string
   141  	}{
   142  		{
   143  			name: "tags in ID",
   144  			id:   "bar=baz,quip=quix",
   145  		},
   146  		{
   147  			name: "tags in ID with specific format",
   148  			id:   `{bar="baz",quip="quix"}`,
   149  		},
   150  		{
   151  			name: "tags in ID with specific format reverse order",
   152  			id:   `{quip="quix",bar="baz"}`,
   153  		},
   154  		{
   155  			name: "inexact tag occurrence in ID",
   156  			id:   "quixquip_bazillion_barometers",
   157  		},
   158  	}
   159  	tags := ident.NewTags(
   160  		ident.StringTag("bar", "baz"),
   161  		ident.StringTag("quip", "quix"),
   162  	)
   163  
   164  	for _, tt := range tests {
   165  		t.Run(tt.name, func(t *testing.T) {
   166  			seriesID := ident.StringID(tt.id)
   167  			d, err := convert.FromSeriesIDAndTagIter(seriesID, ident.NewTagsIterator(tags))
   168  			assert.NoError(t, err)
   169  			assertContentsMatch(t, seriesID, tags.Values(), d)
   170  			assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID))
   171  			for i := range d.Fields {
   172  				assertBackedBySameData(t, d.ID, d.Fields[i].Name)
   173  				assertBackedBySameData(t, d.ID, d.Fields[i].Value)
   174  			}
   175  		})
   176  	}
   177  }
   178  
   179  func TestFromSeriesIDAndEncodedTags(t *testing.T) {
   180  	tests := []struct {
   181  		name string
   182  		id   string
   183  	}{
   184  		{
   185  			name: "no tags in ID",
   186  			id:   "foo",
   187  		},
   188  		{
   189  			name: "tags in ID",
   190  			id:   "bar=baz,quip=quix",
   191  		},
   192  		{
   193  			name: "tags in ID with specific format",
   194  			id:   `{bar="baz",quip="quix"}`,
   195  		},
   196  		{
   197  			name: "tags in ID with specific format reverse order",
   198  			id:   `{quip="quix",bar="baz"}`,
   199  		},
   200  		{
   201  			name: "inexact tag occurrence in ID",
   202  			id:   "quixquip_bazillion_barometers",
   203  		},
   204  	}
   205  	var (
   206  		tags = ident.NewTags(
   207  			ident.StringTag("bar", "baz"),
   208  			ident.StringTag("quip", "quix"),
   209  		)
   210  		encodedTags = toEncodedTags(t, tags)
   211  	)
   212  
   213  	for _, tt := range tests {
   214  		t.Run(tt.name, func(t *testing.T) {
   215  			seriesID := ident.BytesID(tt.id)
   216  			d, err := convert.FromSeriesIDAndEncodedTags(seriesID, encodedTags)
   217  			assert.NoError(t, err)
   218  			assertContentsMatch(t, seriesID, tags.Values(), d)
   219  			assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID))
   220  			for i := range d.Fields {
   221  				assertBackedBySameData(t, d.ID, d.Fields[i].Name)
   222  				assertBackedBySameData(t, d.ID, d.Fields[i].Value)
   223  			}
   224  		})
   225  	}
   226  }
   227  
   228  func TestFromSeriesIDAndEncodedTags_EmptyEncodedTags(t *testing.T) {
   229  	tests := []struct {
   230  		name        string
   231  		encodedTags []byte
   232  	}{
   233  		{
   234  			name:        "nil slice",
   235  			encodedTags: nil,
   236  		},
   237  		{
   238  			name:        "empty slice",
   239  			encodedTags: make([]byte, 0),
   240  		},
   241  	}
   242  
   243  	var (
   244  		seriesID = ident.BytesID("foo")
   245  		expected = doc.Metadata{
   246  			ID:     seriesID,
   247  			Fields: nil,
   248  		}
   249  	)
   250  
   251  	for _, tt := range tests {
   252  		t.Run(tt.name, func(t *testing.T) {
   253  			d, err := convert.FromSeriesIDAndEncodedTags(seriesID, tt.encodedTags)
   254  			assert.NoError(t, err)
   255  			assert.Equal(t, expected, d)
   256  			assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID))
   257  		})
   258  	}
   259  }
   260  
   261  func TestFromSeriesIDAndEncodedTagsInvalid(t *testing.T) {
   262  	var (
   263  		validEncodedTags     = []byte{117, 39, 1, 0, 3, 0, 98, 97, 114, 3, 0, 98, 97, 122}
   264  		tagsWithReservedName = toEncodedTags(t, ident.NewTags(
   265  			ident.StringTag(string(convert.ReservedFieldNameID), "some_value"),
   266  		))
   267  	)
   268  
   269  	tests := []struct {
   270  		name        string
   271  		encodedTags []byte
   272  	}{
   273  		{
   274  			name:        "reserved tag name",
   275  			encodedTags: tagsWithReservedName,
   276  		},
   277  		{
   278  			name:        "incomplete header",
   279  			encodedTags: validEncodedTags[:3],
   280  		},
   281  		{
   282  			name:        "incomplete tag name length",
   283  			encodedTags: validEncodedTags[:5],
   284  		},
   285  		{
   286  			name:        "incomplete tag value length",
   287  			encodedTags: validEncodedTags[:10],
   288  		},
   289  		{
   290  			name:        "invalid magic number",
   291  			encodedTags: []byte{42, 42, 0, 0},
   292  		},
   293  		{
   294  			name:        "empty tag name",
   295  			encodedTags: []byte{117, 39, 1, 0, 0, 0, 3, 0, 98, 97, 122},
   296  		},
   297  	}
   298  	seriesID := ident.BytesID("foo")
   299  
   300  	for _, tt := range tests {
   301  		t.Run(tt.name, func(t *testing.T) {
   302  			_, err := convert.FromSeriesIDAndEncodedTags(seriesID, tt.encodedTags)
   303  			assert.Error(t, err)
   304  		})
   305  	}
   306  }
   307  
   308  func TestToSeriesValid(t *testing.T) {
   309  	d := doc.Metadata{
   310  		ID: []byte("foo"),
   311  		Fields: []doc.Field{
   312  			{Name: []byte("bar"), Value: []byte("baz")},
   313  			{Name: []byte("some"), Value: []byte("others")},
   314  		},
   315  	}
   316  	id, tags, err := convert.ToSeries(d, testOpts)
   317  	assert.NoError(t, err)
   318  	assert.Equal(t, 2, tags.Remaining())
   319  	assert.Equal(t, "foo", id.String())
   320  	assert.True(t, ident.NewTagIterMatcher(
   321  		ident.MustNewTagStringsIterator("bar", "baz", "some", "others")).Matches(tags))
   322  }
   323  
   324  func TestTagsFromTagsIter(t *testing.T) {
   325  	var (
   326  		id           = ident.StringID("foo")
   327  		expectedTags = ident.NewTags(
   328  			ident.StringTag("bar", "baz"),
   329  			ident.StringTag("foo", "m3"),
   330  		)
   331  		tagsIter = ident.NewTagsIterator(expectedTags)
   332  	)
   333  
   334  	tags, err := convert.TagsFromTagsIter(id, tagsIter, testOpts.IdentPool)
   335  	require.NoError(t, err)
   336  	require.True(t, true, expectedTags.Equal(tags))
   337  }
   338  
   339  func TestTagsFromTagsIterNoPool(t *testing.T) {
   340  	var (
   341  		id           = ident.StringID("foo")
   342  		expectedTags = ident.NewTags(
   343  			ident.StringTag("bar", "baz"),
   344  			ident.StringTag("foo", "m3"),
   345  		)
   346  		tagsIter = ident.NewTagsIterator(expectedTags)
   347  	)
   348  
   349  	tags, err := convert.TagsFromTagsIter(id, tagsIter, nil)
   350  	require.NoError(t, err)
   351  	require.True(t, true, expectedTags.Equal(tags))
   352  }
   353  
   354  func TestToSeriesInvalidID(t *testing.T) {
   355  	d := doc.Metadata{
   356  		Fields: []doc.Field{
   357  			{Name: []byte("bar"), Value: []byte("baz")},
   358  		},
   359  	}
   360  	_, _, err := convert.ToSeries(d, testOpts)
   361  	assert.Error(t, err)
   362  }
   363  
   364  func TestToSeriesInvalidTag(t *testing.T) {
   365  	d := doc.Metadata{
   366  		ID: []byte("foo"),
   367  		Fields: []doc.Field{
   368  			{Name: convert.ReservedFieldNameID, Value: []byte("baz")},
   369  		},
   370  	}
   371  	_, tags, err := convert.ToSeries(d, testOpts)
   372  	assert.NoError(t, err)
   373  	assert.False(t, tags.Next())
   374  	assert.Error(t, tags.Err())
   375  }
   376  
   377  func invalidUTF8Bytes(t *testing.T) []byte {
   378  	bytes, err := hex.DecodeString("bf")
   379  	require.NoError(t, err)
   380  	require.False(t, utf8.Valid(bytes))
   381  	return bytes
   382  }
   383  
   384  func TestValidateSeries(t *testing.T) {
   385  	invalidBytes := checked.NewBytes(invalidUTF8Bytes(t), nil)
   386  
   387  	t.Run("id non-utf8", func(t *testing.T) {
   388  		err := convert.ValidateSeries(ident.BinaryID(invalidBytes),
   389  			ident.NewTags(ident.Tag{
   390  				Name:  ident.StringID("bar"),
   391  				Value: ident.StringID("baz"),
   392  			}))
   393  		require.Error(t, err)
   394  		assert.Contains(t, err.Error(), "invalid non-UTF8 ID")
   395  	})
   396  
   397  	t.Run("tag name reserved", func(t *testing.T) {
   398  		reservedName := checked.NewBytes(convert.ReservedFieldNameID, nil)
   399  		err := convert.ValidateSeries(ident.StringID("foo"),
   400  			ident.NewTags(ident.Tag{
   401  				Name:  ident.BinaryID(reservedName),
   402  				Value: ident.StringID("bar"),
   403  			}))
   404  		require.Error(t, err)
   405  		assert.Contains(t, err.Error(), "reserved field name")
   406  	})
   407  
   408  	t.Run("tag name non-utf8", func(t *testing.T) {
   409  		err := convert.ValidateSeries(ident.StringID("foo"),
   410  			ident.NewTags(ident.Tag{
   411  				Name:  ident.BinaryID(invalidBytes),
   412  				Value: ident.StringID("bar"),
   413  			}))
   414  		require.Error(t, err)
   415  		assert.Contains(t, err.Error(), "invalid non-UTF8 field name")
   416  	})
   417  
   418  	t.Run("tag value non-utf8", func(t *testing.T) {
   419  		err := convert.ValidateSeries(ident.StringID("foo"),
   420  			ident.NewTags(ident.Tag{
   421  				Name:  ident.StringID("bar"),
   422  				Value: ident.BinaryID(invalidBytes),
   423  			}))
   424  		require.Error(t, err)
   425  		assert.Contains(t, err.Error(), "invalid non-UTF8 field value")
   426  	})
   427  }
   428  
   429  // TODO(prateek): add a test to ensure we're interacting with the Pools as expected
   430  
   431  func assertContentsMatch(t *testing.T, seriesID ident.ID, tags []ident.Tag, doc doc.Metadata) {
   432  	assert.Equal(t, seriesID.String(), string(doc.ID))
   433  	assert.Len(t, doc.Fields, len(tags))
   434  	for i, f := range doc.Fields { //nolint:gocritic
   435  		assert.Equal(t, tags[i].Name.String(), string(f.Name))
   436  		assert.Equal(t, tags[i].Value.String(), string(f.Value))
   437  	}
   438  }
   439  
   440  func assertBackedBySameData(t *testing.T, outer, inner []byte) {
   441  	if idx := bytes.Index(outer, inner); idx != -1 {
   442  		subslice := outer[idx : idx+len(inner)]
   443  		assert.True(t, test.ByteSlicesBackedBySameData(subslice, inner))
   444  	}
   445  }
   446  
   447  func toEncodedTags(t *testing.T, tags ident.Tags) []byte {
   448  	pool := serialize.NewTagEncoderPool(serialize.NewTagEncoderOptions(), nil)
   449  	pool.Init()
   450  	encoder := pool.Get()
   451  	defer encoder.Finalize()
   452  
   453  	require.NoError(t, encoder.Encode(ident.NewTagsIterator(tags)))
   454  	data, ok := encoder.Data()
   455  	require.True(t, ok)
   456  	return append([]byte(nil), data.Bytes()...)
   457  }