github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/convert/convert_test.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 package convert_test 21 22 import ( 23 "bytes" 24 "encoding/hex" 25 "testing" 26 "unicode/utf8" 27 28 "github.com/m3db/m3/src/dbnode/storage/index/convert" 29 "github.com/m3db/m3/src/m3ninx/doc" 30 "github.com/m3db/m3/src/x/checked" 31 "github.com/m3db/m3/src/x/ident" 32 "github.com/m3db/m3/src/x/pool" 33 "github.com/m3db/m3/src/x/serialize" 34 "github.com/m3db/m3/src/x/test" 35 36 "github.com/stretchr/testify/assert" 37 "github.com/stretchr/testify/require" 38 ) 39 40 var ( 41 testOpts convert.Opts 42 ) 43 44 func init() { 45 // NB: allocating once to save memory in tests 46 bytesPool := pool.NewCheckedBytesPool(nil, nil, func(s []pool.Bucket) pool.BytesPool { 47 return pool.NewBytesPool(s, nil) 48 }) 49 bytesPool.Init() 50 idPool := ident.NewPool(bytesPool, ident.PoolOptions{}) 51 testOpts.CheckedBytesPool = bytesPool 52 testOpts.IdentPool = idPool 53 } 54 55 func TestFromSeriesIDAndTagsInvalid(t *testing.T) { 56 id := ident.StringID("foo") 57 tags := ident.NewTags( 58 ident.StringTag(string(convert.ReservedFieldNameID), "value"), 59 ) 60 _, err := convert.FromSeriesIDAndTags(id, tags) 61 assert.Error(t, err) 62 } 63 64 func TestFromSeriesIDAndTagIteratorInvalid(t *testing.T) { 65 id := ident.StringID("foo") 66 tags := ident.NewTags( 67 ident.StringTag(string(convert.ReservedFieldNameID), "value"), 68 ) 69 _, err := convert.FromSeriesIDAndTagIter(id, ident.NewTagsIterator(tags)) 70 assert.Error(t, err) 71 } 72 73 func TestFromSeriesIDAndTagsValid(t *testing.T) { 74 id := ident.StringID("foo") 75 tags := ident.NewTags( 76 ident.StringTag("bar", "baz"), 77 ) 78 d, err := convert.FromSeriesIDAndTags(id, tags) 79 assert.NoError(t, err) 80 assertContentsMatch(t, id, tags.Values(), d) 81 assert.False(t, test.ByteSlicesBackedBySameData(id.Bytes(), d.ID)) 82 } 83 84 func TestFromSeriesIDAndTagsReuseBytesFromSeriesId(t *testing.T) { 85 tests := []struct { 86 name string 87 id string 88 }{ 89 { 90 name: "tags in ID", 91 id: "bar=baz,quip=quix", 92 }, 93 { 94 name: "tags in ID with specific format", 95 id: `{bar="baz",quip="quix"}`, 96 }, 97 { 98 name: "tags in ID with specific format reverse order", 99 id: `{quip="quix",bar="baz"}`, 100 }, 101 { 102 name: "inexact tag occurrence in ID", 103 id: "quixquip_bazillion_barometers", 104 }, 105 } 106 tags := ident.NewTags( 107 ident.StringTag("bar", "baz"), 108 ident.StringTag("quip", "quix"), 109 ) 110 111 for _, tt := range tests { 112 t.Run(tt.name, func(t *testing.T) { 113 seriesID := ident.StringID(tt.id) 114 d, err := convert.FromSeriesIDAndTags(seriesID, tags) 115 assert.NoError(t, err) 116 assertContentsMatch(t, seriesID, tags.Values(), d) 117 assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID)) 118 for i := range d.Fields { 119 assertBackedBySameData(t, d.ID, d.Fields[i].Name) 120 assertBackedBySameData(t, d.ID, d.Fields[i].Value) 121 } 122 }) 123 } 124 } 125 126 func TestFromSeriesIDAndTagIterValid(t *testing.T) { 127 id := ident.StringID("foo") 128 tags := ident.NewTags( 129 ident.StringTag("bar", "baz"), 130 ) 131 d, err := convert.FromSeriesIDAndTagIter(id, ident.NewTagsIterator(tags)) 132 assert.NoError(t, err) 133 assertContentsMatch(t, id, tags.Values(), d) 134 assert.False(t, test.ByteSlicesBackedBySameData(id.Bytes(), d.ID)) 135 } 136 137 func TestFromSeriesIDAndTagIterReuseBytesFromSeriesId(t *testing.T) { 138 tests := []struct { 139 name string 140 id string 141 }{ 142 { 143 name: "tags in ID", 144 id: "bar=baz,quip=quix", 145 }, 146 { 147 name: "tags in ID with specific format", 148 id: `{bar="baz",quip="quix"}`, 149 }, 150 { 151 name: "tags in ID with specific format reverse order", 152 id: `{quip="quix",bar="baz"}`, 153 }, 154 { 155 name: "inexact tag occurrence in ID", 156 id: "quixquip_bazillion_barometers", 157 }, 158 } 159 tags := ident.NewTags( 160 ident.StringTag("bar", "baz"), 161 ident.StringTag("quip", "quix"), 162 ) 163 164 for _, tt := range tests { 165 t.Run(tt.name, func(t *testing.T) { 166 seriesID := ident.StringID(tt.id) 167 d, err := convert.FromSeriesIDAndTagIter(seriesID, ident.NewTagsIterator(tags)) 168 assert.NoError(t, err) 169 assertContentsMatch(t, seriesID, tags.Values(), d) 170 assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID)) 171 for i := range d.Fields { 172 assertBackedBySameData(t, d.ID, d.Fields[i].Name) 173 assertBackedBySameData(t, d.ID, d.Fields[i].Value) 174 } 175 }) 176 } 177 } 178 179 func TestFromSeriesIDAndEncodedTags(t *testing.T) { 180 tests := []struct { 181 name string 182 id string 183 }{ 184 { 185 name: "no tags in ID", 186 id: "foo", 187 }, 188 { 189 name: "tags in ID", 190 id: "bar=baz,quip=quix", 191 }, 192 { 193 name: "tags in ID with specific format", 194 id: `{bar="baz",quip="quix"}`, 195 }, 196 { 197 name: "tags in ID with specific format reverse order", 198 id: `{quip="quix",bar="baz"}`, 199 }, 200 { 201 name: "inexact tag occurrence in ID", 202 id: "quixquip_bazillion_barometers", 203 }, 204 } 205 var ( 206 tags = ident.NewTags( 207 ident.StringTag("bar", "baz"), 208 ident.StringTag("quip", "quix"), 209 ) 210 encodedTags = toEncodedTags(t, tags) 211 ) 212 213 for _, tt := range tests { 214 t.Run(tt.name, func(t *testing.T) { 215 seriesID := ident.BytesID(tt.id) 216 d, err := convert.FromSeriesIDAndEncodedTags(seriesID, encodedTags) 217 assert.NoError(t, err) 218 assertContentsMatch(t, seriesID, tags.Values(), d) 219 assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID)) 220 for i := range d.Fields { 221 assertBackedBySameData(t, d.ID, d.Fields[i].Name) 222 assertBackedBySameData(t, d.ID, d.Fields[i].Value) 223 } 224 }) 225 } 226 } 227 228 func TestFromSeriesIDAndEncodedTags_EmptyEncodedTags(t *testing.T) { 229 tests := []struct { 230 name string 231 encodedTags []byte 232 }{ 233 { 234 name: "nil slice", 235 encodedTags: nil, 236 }, 237 { 238 name: "empty slice", 239 encodedTags: make([]byte, 0), 240 }, 241 } 242 243 var ( 244 seriesID = ident.BytesID("foo") 245 expected = doc.Metadata{ 246 ID: seriesID, 247 Fields: nil, 248 } 249 ) 250 251 for _, tt := range tests { 252 t.Run(tt.name, func(t *testing.T) { 253 d, err := convert.FromSeriesIDAndEncodedTags(seriesID, tt.encodedTags) 254 assert.NoError(t, err) 255 assert.Equal(t, expected, d) 256 assert.False(t, test.ByteSlicesBackedBySameData(seriesID.Bytes(), d.ID)) 257 }) 258 } 259 } 260 261 func TestFromSeriesIDAndEncodedTagsInvalid(t *testing.T) { 262 var ( 263 validEncodedTags = []byte{117, 39, 1, 0, 3, 0, 98, 97, 114, 3, 0, 98, 97, 122} 264 tagsWithReservedName = toEncodedTags(t, ident.NewTags( 265 ident.StringTag(string(convert.ReservedFieldNameID), "some_value"), 266 )) 267 ) 268 269 tests := []struct { 270 name string 271 encodedTags []byte 272 }{ 273 { 274 name: "reserved tag name", 275 encodedTags: tagsWithReservedName, 276 }, 277 { 278 name: "incomplete header", 279 encodedTags: validEncodedTags[:3], 280 }, 281 { 282 name: "incomplete tag name length", 283 encodedTags: validEncodedTags[:5], 284 }, 285 { 286 name: "incomplete tag value length", 287 encodedTags: validEncodedTags[:10], 288 }, 289 { 290 name: "invalid magic number", 291 encodedTags: []byte{42, 42, 0, 0}, 292 }, 293 { 294 name: "empty tag name", 295 encodedTags: []byte{117, 39, 1, 0, 0, 0, 3, 0, 98, 97, 122}, 296 }, 297 } 298 seriesID := ident.BytesID("foo") 299 300 for _, tt := range tests { 301 t.Run(tt.name, func(t *testing.T) { 302 _, err := convert.FromSeriesIDAndEncodedTags(seriesID, tt.encodedTags) 303 assert.Error(t, err) 304 }) 305 } 306 } 307 308 func TestToSeriesValid(t *testing.T) { 309 d := doc.Metadata{ 310 ID: []byte("foo"), 311 Fields: []doc.Field{ 312 {Name: []byte("bar"), Value: []byte("baz")}, 313 {Name: []byte("some"), Value: []byte("others")}, 314 }, 315 } 316 id, tags, err := convert.ToSeries(d, testOpts) 317 assert.NoError(t, err) 318 assert.Equal(t, 2, tags.Remaining()) 319 assert.Equal(t, "foo", id.String()) 320 assert.True(t, ident.NewTagIterMatcher( 321 ident.MustNewTagStringsIterator("bar", "baz", "some", "others")).Matches(tags)) 322 } 323 324 func TestTagsFromTagsIter(t *testing.T) { 325 var ( 326 id = ident.StringID("foo") 327 expectedTags = ident.NewTags( 328 ident.StringTag("bar", "baz"), 329 ident.StringTag("foo", "m3"), 330 ) 331 tagsIter = ident.NewTagsIterator(expectedTags) 332 ) 333 334 tags, err := convert.TagsFromTagsIter(id, tagsIter, testOpts.IdentPool) 335 require.NoError(t, err) 336 require.True(t, true, expectedTags.Equal(tags)) 337 } 338 339 func TestTagsFromTagsIterNoPool(t *testing.T) { 340 var ( 341 id = ident.StringID("foo") 342 expectedTags = ident.NewTags( 343 ident.StringTag("bar", "baz"), 344 ident.StringTag("foo", "m3"), 345 ) 346 tagsIter = ident.NewTagsIterator(expectedTags) 347 ) 348 349 tags, err := convert.TagsFromTagsIter(id, tagsIter, nil) 350 require.NoError(t, err) 351 require.True(t, true, expectedTags.Equal(tags)) 352 } 353 354 func TestToSeriesInvalidID(t *testing.T) { 355 d := doc.Metadata{ 356 Fields: []doc.Field{ 357 {Name: []byte("bar"), Value: []byte("baz")}, 358 }, 359 } 360 _, _, err := convert.ToSeries(d, testOpts) 361 assert.Error(t, err) 362 } 363 364 func TestToSeriesInvalidTag(t *testing.T) { 365 d := doc.Metadata{ 366 ID: []byte("foo"), 367 Fields: []doc.Field{ 368 {Name: convert.ReservedFieldNameID, Value: []byte("baz")}, 369 }, 370 } 371 _, tags, err := convert.ToSeries(d, testOpts) 372 assert.NoError(t, err) 373 assert.False(t, tags.Next()) 374 assert.Error(t, tags.Err()) 375 } 376 377 func invalidUTF8Bytes(t *testing.T) []byte { 378 bytes, err := hex.DecodeString("bf") 379 require.NoError(t, err) 380 require.False(t, utf8.Valid(bytes)) 381 return bytes 382 } 383 384 func TestValidateSeries(t *testing.T) { 385 invalidBytes := checked.NewBytes(invalidUTF8Bytes(t), nil) 386 387 t.Run("id non-utf8", func(t *testing.T) { 388 err := convert.ValidateSeries(ident.BinaryID(invalidBytes), 389 ident.NewTags(ident.Tag{ 390 Name: ident.StringID("bar"), 391 Value: ident.StringID("baz"), 392 })) 393 require.Error(t, err) 394 assert.Contains(t, err.Error(), "invalid non-UTF8 ID") 395 }) 396 397 t.Run("tag name reserved", func(t *testing.T) { 398 reservedName := checked.NewBytes(convert.ReservedFieldNameID, nil) 399 err := convert.ValidateSeries(ident.StringID("foo"), 400 ident.NewTags(ident.Tag{ 401 Name: ident.BinaryID(reservedName), 402 Value: ident.StringID("bar"), 403 })) 404 require.Error(t, err) 405 assert.Contains(t, err.Error(), "reserved field name") 406 }) 407 408 t.Run("tag name non-utf8", func(t *testing.T) { 409 err := convert.ValidateSeries(ident.StringID("foo"), 410 ident.NewTags(ident.Tag{ 411 Name: ident.BinaryID(invalidBytes), 412 Value: ident.StringID("bar"), 413 })) 414 require.Error(t, err) 415 assert.Contains(t, err.Error(), "invalid non-UTF8 field name") 416 }) 417 418 t.Run("tag value non-utf8", func(t *testing.T) { 419 err := convert.ValidateSeries(ident.StringID("foo"), 420 ident.NewTags(ident.Tag{ 421 Name: ident.StringID("bar"), 422 Value: ident.BinaryID(invalidBytes), 423 })) 424 require.Error(t, err) 425 assert.Contains(t, err.Error(), "invalid non-UTF8 field value") 426 }) 427 } 428 429 // TODO(prateek): add a test to ensure we're interacting with the Pools as expected 430 431 func assertContentsMatch(t *testing.T, seriesID ident.ID, tags []ident.Tag, doc doc.Metadata) { 432 assert.Equal(t, seriesID.String(), string(doc.ID)) 433 assert.Len(t, doc.Fields, len(tags)) 434 for i, f := range doc.Fields { //nolint:gocritic 435 assert.Equal(t, tags[i].Name.String(), string(f.Name)) 436 assert.Equal(t, tags[i].Value.String(), string(f.Value)) 437 } 438 } 439 440 func assertBackedBySameData(t *testing.T, outer, inner []byte) { 441 if idx := bytes.Index(outer, inner); idx != -1 { 442 subslice := outer[idx : idx+len(inner)] 443 assert.True(t, test.ByteSlicesBackedBySameData(subslice, inner)) 444 } 445 } 446 447 func toEncodedTags(t *testing.T, tags ident.Tags) []byte { 448 pool := serialize.NewTagEncoderPool(serialize.NewTagEncoderOptions(), nil) 449 pool.Init() 450 encoder := pool.Get() 451 defer encoder.Finalize() 452 453 require.NoError(t, encoder.Encode(ident.NewTagsIterator(tags))) 454 data, ok := encoder.Data() 455 require.True(t, ok) 456 return append([]byte(nil), data.Bytes()...) 457 }