github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/merger_test.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "io" 25 "os" 26 "path/filepath" 27 "testing" 28 "time" 29 30 "github.com/golang/mock/gomock" 31 "github.com/m3db/m3/src/dbnode/digest" 32 "github.com/m3db/m3/src/dbnode/encoding" 33 "github.com/m3db/m3/src/dbnode/encoding/m3tsz" 34 "github.com/m3db/m3/src/dbnode/namespace" 35 "github.com/m3db/m3/src/dbnode/persist" 36 "github.com/m3db/m3/src/dbnode/storage/block" 37 "github.com/m3db/m3/src/dbnode/ts" 38 "github.com/m3db/m3/src/dbnode/x/xio" 39 "github.com/m3db/m3/src/m3ninx/doc" 40 "github.com/m3db/m3/src/x/checked" 41 "github.com/m3db/m3/src/x/context" 42 "github.com/m3db/m3/src/x/ident" 43 "github.com/m3db/m3/src/x/pool" 44 xtime "github.com/m3db/m3/src/x/time" 45 "github.com/stretchr/testify/assert" 46 "github.com/stretchr/testify/require" 47 ) 48 49 const ( 50 blockSize = time.Hour 51 ) 52 53 var ( 54 srPool xio.SegmentReaderPool 55 multiIterPool encoding.MultiReaderIteratorPool 56 identPool ident.Pool 57 encoderPool encoding.EncoderPool 58 contextPool context.Pool 59 bytesPool pool.CheckedBytesPool 60 61 startTime = xtime.Now().Truncate(blockSize) 62 63 id0 = ident.StringID("id0") 64 id1 = ident.StringID("id1") 65 id2 = ident.StringID("id2") 66 id3 = ident.StringID("id3") 67 id4 = ident.StringID("id4") 68 id5 = ident.StringID("id5") 69 ) 70 71 // init resources _except_ the fsReader, which should be configured on a 72 // per-test basis with NewMockDataFileSetReader. 73 func init() { 74 poolOpts := pool.NewObjectPoolOptions().SetSize(1) 75 srPool = xio.NewSegmentReaderPool(poolOpts) 76 srPool.Init() 77 multiIterPool = encoding.NewMultiReaderIteratorPool(poolOpts) 78 multiIterPool.Init(m3tsz.DefaultReaderIteratorAllocFn(encoding.NewOptions())) 79 bytesPool := pool.NewCheckedBytesPool(nil, poolOpts, func(s []pool.Bucket) pool.BytesPool { 80 return pool.NewBytesPool(s, poolOpts) 81 }) 82 bytesPool.Init() 83 identPool = ident.NewPool(bytesPool, ident.PoolOptions{}) 84 encoderPool = encoding.NewEncoderPool(poolOpts) 85 encoderPool.Init(func() encoding.Encoder { 86 return m3tsz.NewEncoder(startTime, nil, true, encoding.NewOptions()) 87 }) 88 contextPool = context.NewPool(context.NewOptions(). 89 SetContextPoolOptions(poolOpts). 90 SetFinalizerPoolOptions(poolOpts)) 91 bytesPool = pool.NewCheckedBytesPool(nil, poolOpts, func(s []pool.Bucket) pool.BytesPool { 92 return pool.NewBytesPool(s, poolOpts) 93 }) 94 bytesPool.Init() 95 } 96 97 func TestMergeWithIntersection(t *testing.T) { 98 // This test scenario is when there is an overlap in series data between 99 // disk and the merge target. 100 // id0-id3 is on disk, while the merge target has id1-id5. 101 // Both have id1, but they don't have datapoints with overlapping 102 // timestamps. 103 // Both have id2, and some datapoints have overlapping timestamps. 104 // Both have id3, and all datapoints have overlapping timestamps. 105 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 106 diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 107 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 108 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 109 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 110 })) 111 diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 112 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 113 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 114 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 115 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 116 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 117 })) 118 diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 119 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 120 {TimestampNanos: startTime.Add(3 * time.Second), Value: 8}, 121 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 122 {TimestampNanos: startTime.Add(6 * time.Second), Value: 10}, 123 {TimestampNanos: startTime.Add(7 * time.Second), Value: 11}, 124 {TimestampNanos: startTime.Add(10 * time.Second), Value: 12}, 125 })) 126 diskData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 127 {TimestampNanos: startTime.Add(2 * time.Second), Value: 13}, 128 {TimestampNanos: startTime.Add(4 * time.Second), Value: 14}, 129 {TimestampNanos: startTime.Add(8 * time.Second), Value: 15}, 130 })) 131 132 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 133 mergeTargetData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 134 {TimestampNanos: startTime.Add(4 * time.Second), Value: 16}, 135 {TimestampNanos: startTime.Add(5 * time.Second), Value: 17}, 136 {TimestampNanos: startTime.Add(8 * time.Second), Value: 18}, 137 })) 138 mergeTargetData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 139 {TimestampNanos: startTime.Add(3 * time.Second), Value: 19}, 140 {TimestampNanos: startTime.Add(6 * time.Second), Value: 20}, 141 {TimestampNanos: startTime.Add(7 * time.Second), Value: 21}, 142 {TimestampNanos: startTime.Add(9 * time.Second), Value: 22}, 143 {TimestampNanos: startTime.Add(10 * time.Second), Value: 23}, 144 {TimestampNanos: startTime.Add(13 * time.Second), Value: 24}, 145 {TimestampNanos: startTime.Add(16 * time.Second), Value: 25}, 146 })) 147 mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 148 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 149 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 150 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 151 })) 152 mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 153 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 154 })) 155 mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 156 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 157 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 158 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 159 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 160 })) 161 162 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 163 expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 164 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 165 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 166 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 167 })) 168 expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 169 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 170 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 171 {TimestampNanos: startTime.Add(4 * time.Second), Value: 16}, 172 {TimestampNanos: startTime.Add(5 * time.Second), Value: 17}, 173 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 174 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 175 {TimestampNanos: startTime.Add(8 * time.Second), Value: 18}, 176 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 177 })) 178 expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 179 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 180 {TimestampNanos: startTime.Add(3 * time.Second), Value: 19}, 181 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 182 {TimestampNanos: startTime.Add(6 * time.Second), Value: 20}, 183 {TimestampNanos: startTime.Add(7 * time.Second), Value: 21}, 184 {TimestampNanos: startTime.Add(9 * time.Second), Value: 22}, 185 {TimestampNanos: startTime.Add(10 * time.Second), Value: 23}, 186 {TimestampNanos: startTime.Add(13 * time.Second), Value: 24}, 187 {TimestampNanos: startTime.Add(16 * time.Second), Value: 25}, 188 })) 189 expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 190 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 191 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 192 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 193 })) 194 expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 195 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 196 })) 197 expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 198 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 199 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 200 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 201 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 202 })) 203 204 testMergeWith(t, diskData, mergeTargetData, expected) 205 } 206 207 func TestMergeWithFullIntersection(t *testing.T) { 208 // This test scenario is when the merge target contains only and all data 209 // from disk. 210 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 211 diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 212 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 213 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 214 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 215 })) 216 diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 217 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 218 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 219 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 220 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 221 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 222 })) 223 224 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 225 mergeTargetData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 226 {TimestampNanos: startTime.Add(0 * time.Second), Value: 7}, 227 {TimestampNanos: startTime.Add(1 * time.Second), Value: 8}, 228 {TimestampNanos: startTime.Add(2 * time.Second), Value: 9}, 229 })) 230 mergeTargetData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 231 {TimestampNanos: startTime.Add(2 * time.Second), Value: 10}, 232 {TimestampNanos: startTime.Add(3 * time.Second), Value: 11}, 233 {TimestampNanos: startTime.Add(6 * time.Second), Value: 12}, 234 {TimestampNanos: startTime.Add(7 * time.Second), Value: 13}, 235 {TimestampNanos: startTime.Add(9 * time.Second), Value: 14}, 236 })) 237 238 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 239 expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 240 {TimestampNanos: startTime.Add(0 * time.Second), Value: 7}, 241 {TimestampNanos: startTime.Add(1 * time.Second), Value: 8}, 242 {TimestampNanos: startTime.Add(2 * time.Second), Value: 9}, 243 })) 244 expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 245 {TimestampNanos: startTime.Add(2 * time.Second), Value: 10}, 246 {TimestampNanos: startTime.Add(3 * time.Second), Value: 11}, 247 {TimestampNanos: startTime.Add(6 * time.Second), Value: 12}, 248 {TimestampNanos: startTime.Add(7 * time.Second), Value: 13}, 249 {TimestampNanos: startTime.Add(9 * time.Second), Value: 14}, 250 })) 251 252 testMergeWith(t, diskData, mergeTargetData, expected) 253 } 254 255 func TestMergeWithNoIntersection(t *testing.T) { 256 // This test scenario is when there is no overlap between disk data and 257 // merge target data (series from one source does not exist in the other). 258 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 259 diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 260 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 261 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 262 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 263 })) 264 diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 265 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 266 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 267 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 268 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 269 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 270 })) 271 diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 272 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 273 {TimestampNanos: startTime.Add(3 * time.Second), Value: 8}, 274 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 275 {TimestampNanos: startTime.Add(6 * time.Second), Value: 10}, 276 {TimestampNanos: startTime.Add(7 * time.Second), Value: 11}, 277 {TimestampNanos: startTime.Add(10 * time.Second), Value: 12}, 278 })) 279 280 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 281 mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 282 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 283 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 284 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 285 })) 286 mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 287 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 288 })) 289 mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 290 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 291 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 292 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 293 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 294 })) 295 296 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 297 expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 298 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 299 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 300 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 301 })) 302 expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 303 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 304 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 305 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 306 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 307 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 308 })) 309 expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 310 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 311 {TimestampNanos: startTime.Add(3 * time.Second), Value: 8}, 312 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 313 {TimestampNanos: startTime.Add(6 * time.Second), Value: 10}, 314 {TimestampNanos: startTime.Add(7 * time.Second), Value: 11}, 315 {TimestampNanos: startTime.Add(10 * time.Second), Value: 12}, 316 })) 317 expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 318 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 319 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 320 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 321 })) 322 expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 323 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 324 })) 325 expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 326 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 327 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 328 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 329 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 330 })) 331 332 testMergeWith(t, diskData, mergeTargetData, expected) 333 } 334 335 func TestMergeWithNoMergeTargetData(t *testing.T) { 336 // This test scenario is when there is no data in the merge target. 337 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 338 diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 339 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 340 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 341 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 342 })) 343 diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 344 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 345 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 346 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 347 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 348 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 349 })) 350 diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 351 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 352 {TimestampNanos: startTime.Add(3 * time.Second), Value: 8}, 353 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 354 {TimestampNanos: startTime.Add(6 * time.Second), Value: 10}, 355 {TimestampNanos: startTime.Add(7 * time.Second), Value: 11}, 356 {TimestampNanos: startTime.Add(10 * time.Second), Value: 12}, 357 })) 358 359 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 360 361 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 362 expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{ 363 {TimestampNanos: startTime.Add(0 * time.Second), Value: 0}, 364 {TimestampNanos: startTime.Add(1 * time.Second), Value: 1}, 365 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 366 })) 367 expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{ 368 {TimestampNanos: startTime.Add(2 * time.Second), Value: 2}, 369 {TimestampNanos: startTime.Add(3 * time.Second), Value: 3}, 370 {TimestampNanos: startTime.Add(6 * time.Second), Value: 4}, 371 {TimestampNanos: startTime.Add(7 * time.Second), Value: 5}, 372 {TimestampNanos: startTime.Add(9 * time.Second), Value: 6}, 373 })) 374 expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{ 375 {TimestampNanos: startTime.Add(1 * time.Second), Value: 7}, 376 {TimestampNanos: startTime.Add(3 * time.Second), Value: 8}, 377 {TimestampNanos: startTime.Add(5 * time.Second), Value: 9}, 378 {TimestampNanos: startTime.Add(6 * time.Second), Value: 10}, 379 {TimestampNanos: startTime.Add(7 * time.Second), Value: 11}, 380 {TimestampNanos: startTime.Add(10 * time.Second), Value: 12}, 381 })) 382 383 testMergeWith(t, diskData, mergeTargetData, expected) 384 } 385 386 func TestMergeWithNoDiskData(t *testing.T) { 387 // This test scenario is there is no data on disk. 388 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 389 390 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 391 mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 392 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 393 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 394 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 395 })) 396 mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 397 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 398 })) 399 mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 400 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 401 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 402 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 403 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 404 })) 405 406 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 407 expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{ 408 {TimestampNanos: startTime.Add(2 * time.Second), Value: 26}, 409 {TimestampNanos: startTime.Add(4 * time.Second), Value: 27}, 410 {TimestampNanos: startTime.Add(8 * time.Second), Value: 28}, 411 })) 412 expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{ 413 {TimestampNanos: startTime.Add(8 * time.Second), Value: 29}, 414 })) 415 expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{ 416 {TimestampNanos: startTime.Add(3 * time.Second), Value: 30}, 417 {TimestampNanos: startTime.Add(7 * time.Second), Value: 31}, 418 {TimestampNanos: startTime.Add(12 * time.Second), Value: 32}, 419 {TimestampNanos: startTime.Add(15 * time.Second), Value: 34}, 420 })) 421 422 testMergeWith(t, diskData, mergeTargetData, expected) 423 } 424 425 func TestMergeWithNoData(t *testing.T) { 426 // This test scenario is there is no data on disk or the merge target. 427 diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 428 429 mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 430 431 expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{}) 432 433 testMergeWith(t, diskData, mergeTargetData, expected) 434 } 435 436 func TestCleanup(t *testing.T) { 437 dir := createTempDir(t) 438 filePathPrefix := filepath.Join(dir, "") 439 defer os.RemoveAll(dir) 440 441 // Write fileset to disk 442 fsOpts := NewOptions(). 443 SetFilePathPrefix(filePathPrefix) 444 445 md, err := namespace.NewMetadata(ident.StringID("foo"), namespace.NewOptions()) 446 require.NoError(t, err) 447 448 blockStart := xtime.Now() 449 var shard uint32 = 1 450 fsID := FileSetFileIdentifier{ 451 Namespace: md.ID(), 452 Shard: shard, 453 BlockStart: blockStart, 454 VolumeIndex: 0, 455 } 456 writeFilesetToDisk(t, fsID, fsOpts) 457 458 // Verify fileset exists 459 exists, err := DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 0) 460 require.NoError(t, err) 461 require.True(t, exists) 462 463 // Initialize merger 464 reader, err := NewReader(bytesPool, fsOpts) 465 require.NoError(t, err) 466 467 merger := NewMerger(reader, 0, srPool, multiIterPool, identPool, encoderPool, contextPool, 468 filePathPrefix, namespace.NewOptions()) 469 470 // Run merger 471 pm, err := NewPersistManager(fsOpts) 472 require.NoError(t, err) 473 474 preparer, err := pm.StartFlushPersist() 475 require.NoError(t, err) 476 477 err = merger.MergeAndCleanup(fsID, NewNoopMergeWith(), fsID.VolumeIndex+1, preparer, 478 namespace.NewContextFrom(md), &persist.NoOpColdFlushNamespace{}, false) 479 require.NoError(t, err) 480 481 // Verify old fileset gone and new one present 482 exists, err = DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 0) 483 require.NoError(t, err) 484 require.False(t, exists) 485 486 exists, err = DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 1) 487 require.NoError(t, err) 488 require.True(t, exists) 489 } 490 491 func TestCleanupOnceBootstrapped(t *testing.T) { 492 ctrl := gomock.NewController(t) 493 defer ctrl.Finish() 494 495 preparer := persist.NewMockFlushPreparer(ctrl) 496 md, err := namespace.NewMetadata(ident.StringID("foo"), namespace.NewOptions()) 497 require.NoError(t, err) 498 499 merger := merger{} 500 err = merger.MergeAndCleanup(FileSetFileIdentifier{}, NewNoopMergeWith(), 1, preparer, 501 namespace.NewContextFrom(md), &persist.NoOpColdFlushNamespace{}, true) 502 require.Error(t, err) 503 } 504 505 func writeFilesetToDisk(t *testing.T, fsID FileSetFileIdentifier, fsOpts Options) { 506 w, err := NewWriter(fsOpts) 507 require.NoError(t, err) 508 509 writerOpts := DataWriterOpenOptions{ 510 Identifier: fsID, 511 BlockSize: 2 * time.Hour, 512 } 513 err = w.Open(writerOpts) 514 require.NoError(t, err) 515 516 entry := []byte{1, 2, 3} 517 518 chkdBytes := checked.NewBytes(entry, nil) 519 chkdBytes.IncRef() 520 metadata := persist.NewMetadataFromIDAndTags(ident.StringID("foo"), 521 ident.Tags{}, persist.MetadataOptions{}) 522 err = w.Write(metadata, chkdBytes, digest.Checksum(entry)) 523 require.NoError(t, err) 524 525 err = w.Close() 526 require.NoError(t, err) 527 } 528 529 func testMergeWith( 530 t *testing.T, 531 diskData *checkedBytesMap, 532 mergeTargetData *checkedBytesMap, 533 expectedData *checkedBytesMap, 534 ) { 535 ctrl := gomock.NewController(t) 536 defer ctrl.Finish() 537 reader := mockReaderFromData(ctrl, diskData) 538 539 var persisted []persistedData 540 var deferClosed bool 541 preparer := persist.NewMockFlushPreparer(ctrl) 542 preparer.EXPECT().PrepareData(gomock.Any()).Return( 543 persist.PreparedDataPersist{ 544 Persist: func(metadata persist.Metadata, segment ts.Segment, checksum uint32) error { 545 persisted = append(persisted, persistedData{ 546 metadata: metadata, 547 // NB(bodu): Once data is persisted the `ts.Segment` gets finalized 548 // so we can't read from it anymore or that violates the read after 549 // free invariant. So we `Clone` the segment here. 550 segment: segment.Clone(nil), 551 }) 552 return nil 553 }, 554 DeferClose: func() (persist.DataCloser, error) { 555 return func() error { 556 require.False(t, deferClosed) 557 deferClosed = true 558 return nil 559 }, nil 560 }, 561 }, nil) 562 nsCtx := namespace.Context{} 563 564 nsOpts := namespace.NewOptions() 565 merger := NewMerger(reader, 0, srPool, multiIterPool, 566 identPool, encoderPool, contextPool, NewOptions().FilePathPrefix(), nsOpts) 567 fsID := FileSetFileIdentifier{ 568 Namespace: ident.StringID("test-ns"), 569 Shard: uint32(8), 570 BlockStart: startTime, 571 } 572 mergeWith := mockMergeWithFromData(t, ctrl, diskData, mergeTargetData) 573 close, err := merger.Merge(fsID, mergeWith, 1, preparer, nsCtx, &persist.NoOpColdFlushNamespace{}) 574 require.NoError(t, err) 575 require.False(t, deferClosed) 576 require.NoError(t, close()) 577 require.True(t, deferClosed) 578 579 assertPersistedAsExpected(t, persisted, expectedData) 580 } 581 582 func assertPersistedAsExpected( 583 t *testing.T, 584 persisted []persistedData, 585 expectedData *checkedBytesMap, 586 ) { 587 // Assert same number of expected series IDs. 588 require.Equal(t, expectedData.Len(), len(persisted)) 589 590 for _, actualData := range persisted { 591 id := actualData.metadata.BytesID() 592 data, exists := expectedData.Get(ident.StringID(string(id))) 593 require.True(t, exists) 594 seg := ts.NewSegment(data, nil, 0, ts.FinalizeHead) 595 596 expectedDPs := datapointsFromSegment(t, seg) 597 actualDPs := datapointsFromSegment(t, actualData.segment) 598 // Assert same number of datapoints for this series. 599 require.Equal(t, len(expectedDPs), len(actualDPs)) 600 for i := range expectedDPs { 601 // Check each datapoint matches what's expected. 602 assert.Equal(t, expectedDPs[i], actualDPs[i]) 603 } 604 } 605 } 606 607 func datapointsToCheckedBytes(t *testing.T, dps []ts.Datapoint) checked.Bytes { 608 encoder := encoderPool.Get() 609 defer encoder.Close() 610 for _, dp := range dps { 611 encoder.Encode(dp, xtime.Second, nil) 612 } 613 614 ctx := context.NewBackground() 615 defer ctx.Close() 616 617 r, ok := encoder.Stream(ctx) 618 require.True(t, ok) 619 bytes, err := xio.ToBytes(r) 620 require.Equal(t, io.EOF, err) 621 622 copied := append([]byte(nil), bytes...) 623 cb := checked.NewBytes(copied, nil) 624 return cb 625 } 626 627 func mockReaderFromData( 628 ctrl *gomock.Controller, 629 diskData *checkedBytesMap, 630 ) *MockDataFileSetReader { 631 reader := NewMockDataFileSetReader(ctrl) 632 reader.EXPECT().Open(gomock.Any()).Return(nil) 633 reader.EXPECT().Close().Return(nil) 634 tagIter := ident.NewTagsIterator(ident.NewTags(ident.StringTag("tag-key0", "tag-val0"))) 635 fakeChecksum := uint32(42) 636 637 var inOrderCalls []*gomock.Call 638 for _, val := range diskData.Iter() { 639 id := val.Key() 640 data := val.Value() 641 inOrderCalls = append(inOrderCalls, 642 reader.EXPECT().Read().Return(id, tagIter, data, fakeChecksum, nil)) 643 } 644 // Make sure to return io.EOF at the end. 645 inOrderCalls = append(inOrderCalls, 646 reader.EXPECT().Read().Return(nil, nil, nil, uint32(0), io.EOF)) 647 gomock.InOrder(inOrderCalls...) 648 649 return reader 650 } 651 652 func mockMergeWithFromData( 653 t *testing.T, 654 ctrl *gomock.Controller, 655 diskData *checkedBytesMap, 656 mergeTargetData *checkedBytesMap, 657 ) *MockMergeWith { 658 mergeWith := NewMockMergeWith(ctrl) 659 660 // Get the series IDs in the merge target that does not exist in disk data. 661 // This logic is not tested here because it should be part of tests of the 662 // mergeWith implementation. 663 var remaining []ident.ID 664 665 // Expect mergeWith.Read for all data points once. Go through all data on 666 // disk, then go through remaining items from merge target. 667 for _, val := range diskData.Iter() { 668 id := val.Key() 669 670 if mergeTargetData.Contains(id) { 671 data, ok := mergeTargetData.Get(id) 672 require.True(t, ok) 673 segReader := srPool.Get() 674 br := []xio.BlockReader{blockReaderFromData(data, segReader, startTime, blockSize)} 675 mergeWith.EXPECT().Read(gomock.Any(), id, gomock.Any(), gomock.Any()). 676 Return(br, true, nil) 677 } else { 678 mergeWith.EXPECT().Read(gomock.Any(), id, gomock.Any(), gomock.Any()). 679 Return(nil, false, nil) 680 } 681 } 682 for _, val := range mergeTargetData.Iter() { 683 id := val.Key() 684 if !diskData.Contains(id) { 685 // Capture remaining items so that we can call the ForEachRemaining 686 // fn on them later. 687 remaining = append(remaining, id) 688 } 689 } 690 691 mergeWith.EXPECT(). 692 ForEachRemaining(gomock.Any(), startTime, gomock.Any(), gomock.Any()). 693 Return(nil). 694 Do(func(ctx context.Context, blockStart xtime.UnixNano, fn ForEachRemainingFn, nsCtx namespace.Context) { 695 for _, id := range remaining { 696 data, ok := mergeTargetData.Get(id) 697 if ok { 698 segReader := srPool.Get() 699 br := block.FetchBlockResult{ 700 Start: startTime, 701 Blocks: []xio.BlockReader{blockReaderFromData(data, segReader, startTime, blockSize)}, 702 } 703 err := fn(doc.Metadata{ID: id.Bytes()}, br) 704 require.NoError(t, err) 705 } 706 } 707 }) 708 709 return mergeWith 710 } 711 712 type persistedData struct { 713 metadata persist.Metadata 714 segment ts.Segment 715 } 716 717 func datapointsFromSegment(t *testing.T, seg ts.Segment) []ts.Datapoint { 718 segReader := srPool.Get() 719 segReader.Reset(seg) 720 iter := multiIterPool.Get() 721 iter.Reset([]xio.SegmentReader{segReader}, startTime, blockSize, nil) 722 defer iter.Close() 723 724 var dps []ts.Datapoint 725 for iter.Next() { 726 dp, _, _ := iter.Current() 727 dps = append(dps, dp) 728 } 729 require.NoError(t, iter.Err()) 730 731 return dps 732 } 733 734 func blockReaderFromData( 735 data checked.Bytes, 736 segReader xio.SegmentReader, 737 startTime xtime.UnixNano, 738 blockSize time.Duration, 739 ) xio.BlockReader { 740 seg := ts.NewSegment(data, nil, 0, ts.FinalizeHead) 741 segReader.Reset(seg) 742 return xio.BlockReader{ 743 SegmentReader: segReader, 744 Start: startTime, 745 BlockSize: blockSize, 746 } 747 }