github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/blockio/pipeline.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package blockio 16 17 import ( 18 "context" 19 "fmt" 20 "runtime" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/matrixorigin/matrixone/pkg/common/stopper" 26 "github.com/matrixorigin/matrixone/pkg/fileservice" 27 "github.com/matrixorigin/matrixone/pkg/objectio" 28 "github.com/matrixorigin/matrixone/pkg/util/metric/stats" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common/utils" 30 w "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks/worker" 31 32 "github.com/matrixorigin/matrixone/pkg/logutil" 33 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/logstore/sm" 34 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/tasks" 35 ) 36 37 var ( 38 _jobPool = sync.Pool{ 39 New: func() any { 40 return new(tasks.Job) 41 }, 42 } 43 _readerPool = sync.Pool{ 44 New: func() any { 45 return new(objectio.ObjectReader) 46 }, 47 } 48 ) 49 50 func getJob( 51 ctx context.Context, 52 id string, 53 typ tasks.JobType, 54 exec tasks.JobExecutor) *tasks.Job { 55 job := _jobPool.Get().(*tasks.Job) 56 job.Init(ctx, id, typ, exec) 57 return job 58 } 59 60 func putJob(job *tasks.Job) { 61 job.Reset() 62 _jobPool.Put(job) 63 } 64 65 func getReader( 66 fs fileservice.FileService, 67 location objectio.Location) *objectio.ObjectReader { 68 job := _readerPool.Get().(*objectio.ObjectReader) 69 job.Init(location, fs) 70 return job 71 } 72 73 func putReader(reader *objectio.ObjectReader) { 74 reader.Reset() 75 _readerPool.Put(reader) 76 } 77 78 // At present, the read and write operations of all modules of mo-service use blockio. 79 // I have started/stopped IoPipeline when mo is initialized/stopped, but in order to 80 // be compatible with the UT of each module, I must add readColumns and noopPrefetch. 81 82 // Most UT cases do not call Start(), so in order to be compatible with these cases, 83 // the pipeline uses readColumns and noopPrefetch.In order to avoid the data race of UT, 84 // I did not switch pipeline.fetchFun and pipeline.prefetchFunc when 85 // I stopped, so I need to execute ResetPipeline again 86 87 var pipeline *IoPipeline 88 89 type IOJobFactory func(context.Context, fetchParams) *tasks.Job 90 91 func init() { 92 pipeline = NewIOPipeline() 93 } 94 95 func Start() { 96 pipeline.Start() 97 pipeline.fetchFun = pipeline.doFetch 98 pipeline.prefetchFunc = pipeline.doPrefetch 99 } 100 101 func Stop() { 102 pipeline.Stop() 103 } 104 105 func ResetPipeline() { 106 pipeline = NewIOPipeline() 107 } 108 109 func makeName(location string) string { 110 return fmt.Sprintf("%s-%d", location, time.Now().UTC().Nanosecond()) 111 } 112 113 // load data job 114 func jobFactory( 115 ctx context.Context, 116 params fetchParams, 117 ) *tasks.Job { 118 return getJob( 119 ctx, 120 makeName(params.reader.GetName()), 121 JTLoad, 122 func(_ context.Context) (res *tasks.JobResult) { 123 // TODO 124 res = &tasks.JobResult{} 125 ioVectors, err := readColumns(ctx, params) 126 if err == nil { 127 res.Res = ioVectors 128 } else { 129 res.Err = err 130 } 131 return 132 }, 133 ) 134 } 135 136 func fetchReader(params PrefetchParams) (reader *objectio.ObjectReader) { 137 if params.reader != nil { 138 reader = params.reader 139 } else { 140 reader = getReader(params.fs, params.key) 141 } 142 return 143 } 144 145 // prefetch data job 146 func prefetchJob(ctx context.Context, params PrefetchParams) *tasks.Job { 147 reader := fetchReader(params) 148 if params.prefetchFile { 149 return getJob( 150 ctx, 151 makeName(reader.GetName()), 152 JTLoad, 153 func(_ context.Context) (res *tasks.JobResult) { 154 // TODO 155 res = &tasks.JobResult{} 156 var name string 157 if params.reader == nil { 158 name = params.key.Name().String() 159 } else { 160 name = params.reader.GetName() 161 } 162 err := reader.GetFs().PrefetchFile(ctx, name) 163 if err != nil { 164 res.Err = err 165 return 166 } 167 // no further reads 168 if params.reader == nil { 169 putReader(reader) 170 } 171 return 172 }, 173 ) 174 } else { 175 return getJob( 176 ctx, 177 makeName(reader.GetName()), 178 JTLoad, 179 func(_ context.Context) (res *tasks.JobResult) { 180 // TODO 181 res = &tasks.JobResult{} 182 ioVectors, err := reader.ReadMultiBlocks(ctx, 183 params.ids, nil) 184 if err != nil { 185 res.Err = err 186 return 187 } 188 // no further reads 189 res.Res = nil 190 ioVectors.Release() 191 if params.reader == nil { 192 putReader(reader) 193 } 194 return 195 }, 196 ) 197 } 198 } 199 200 // prefetch metadata job 201 func prefetchMetaJob(ctx context.Context, params PrefetchParams) *tasks.Job { 202 name := params.key.Name().String() 203 return getJob( 204 ctx, 205 makeName(name), 206 JTLoad, 207 func(_ context.Context) (res *tasks.JobResult) { 208 res = &tasks.JobResult{} 209 _, err := objectio.FastLoadObjectMeta(ctx, ¶ms.key, true, params.fs) 210 if err != nil { 211 res.Err = err 212 return 213 } 214 return 215 }, 216 ) 217 } 218 219 type FetchFunc = func(ctx context.Context, params fetchParams) (any, error) 220 type PrefetchFunc = func(params PrefetchParams) error 221 222 func readColumns(ctx context.Context, params fetchParams) (any, error) { 223 return params.reader.ReadOneBlock(ctx, params.idxes, params.typs, params.blk, nil) 224 } 225 226 func noopPrefetch(params PrefetchParams) error { 227 // Synchronous prefetch does not need to do anything 228 return nil 229 } 230 231 type IoPipeline struct { 232 options struct { 233 fetchParallism int 234 prefetchParallism int 235 queueDepth int 236 } 237 // load queue 238 fetch struct { 239 queue sm.Queue 240 scheduler tasks.JobScheduler 241 } 242 243 // prefetch queue 244 prefetch struct { 245 queue sm.Queue 246 scheduler tasks.JobScheduler 247 } 248 249 waitQ sm.Queue 250 jobFactory IOJobFactory 251 252 active atomic.Bool 253 onceStart sync.Once 254 onceStop sync.Once 255 256 fetchFun FetchFunc 257 prefetchFunc PrefetchFunc 258 259 sensors struct { 260 prefetchDepth *utils.NumericSensor[int64] 261 } 262 263 stats struct { 264 selectivityStats *objectio.Stats 265 prefetchDropStats stats.Counter 266 } 267 printer *stopper.Stopper 268 } 269 270 func NewIOPipeline( 271 opts ...Option, 272 ) *IoPipeline { 273 p := new(IoPipeline) 274 for _, opt := range opts { 275 opt(p) 276 } 277 p.fillDefaults() 278 279 p.waitQ = sm.NewSafeQueue( 280 p.options.queueDepth, 281 100, 282 p.onWait) 283 284 // the prefetch queue is supposed to be an unblocking queue 285 p.prefetch.queue = sm.NewNonBlockingQueue(p.options.queueDepth, 64, p.onPrefetch) 286 p.prefetch.scheduler = tasks.NewParallelJobScheduler(p.options.prefetchParallism) 287 288 p.fetch.queue = sm.NewSafeQueue( 289 p.options.queueDepth, 290 64, 291 p.onFetch) 292 p.fetch.scheduler = tasks.NewParallelJobScheduler(p.options.fetchParallism) 293 294 p.fetchFun = readColumns 295 p.prefetchFunc = noopPrefetch 296 297 p.printer = stopper.NewStopper("IOPrinter") 298 return p 299 } 300 301 func (p *IoPipeline) fillDefaults() { 302 procs := runtime.GOMAXPROCS(0) 303 if p.options.fetchParallism <= 0 { 304 p.options.fetchParallism = procs * 4 305 } 306 if p.options.prefetchParallism <= 0 { 307 p.options.prefetchParallism = procs * 4 308 } 309 if p.options.queueDepth <= 0 { 310 p.options.queueDepth = 100000 311 } 312 if p.jobFactory == nil { 313 p.jobFactory = jobFactory 314 } 315 316 if p.stats.selectivityStats == nil { 317 p.stats.selectivityStats = objectio.NewStats() 318 } 319 320 if p.sensors.prefetchDepth == nil { 321 name := utils.MakeSensorName("IO", "PrefetchDepth") 322 sensor := utils.NewNumericSensor[int64]( 323 name, 324 utils.WithGetStateSensorOption( 325 func(v int64) utils.SensorState { 326 if float64(v) < 0.6*float64(p.options.queueDepth) { 327 return utils.SensorStateGreen 328 } else if float64(v) < 0.8*float64(p.options.queueDepth) { 329 return utils.SensorStateYellow 330 } else { 331 return utils.SensorStateRed 332 } 333 }, 334 ), 335 ) 336 utils.RegisterSensor(sensor) 337 p.sensors.prefetchDepth = sensor 338 } 339 } 340 341 func (p *IoPipeline) Start() { 342 p.onceStart.Do(func() { 343 p.active.Store(true) 344 p.waitQ.Start() 345 p.fetch.queue.Start() 346 p.prefetch.queue.Start() 347 if err := p.printer.RunNamedTask("io-printer-job", p.crontask); err != nil { 348 panic(err) 349 } 350 }) 351 } 352 353 func (p *IoPipeline) Stop() { 354 p.onceStop.Do(func() { 355 p.printer.Stop() 356 p.active.Store(false) 357 358 p.prefetch.queue.Stop() 359 p.fetch.queue.Stop() 360 361 p.prefetch.scheduler.Stop() 362 p.fetch.scheduler.Stop() 363 364 p.waitQ.Stop() 365 if p.sensors.prefetchDepth != nil { 366 utils.UnregisterSensor(p.sensors.prefetchDepth) 367 p.sensors.prefetchDepth = nil 368 } 369 }) 370 } 371 372 func (p *IoPipeline) Fetch( 373 ctx context.Context, 374 params fetchParams, 375 ) (res any, err error) { 376 return p.fetchFun(ctx, params) 377 } 378 379 func (p *IoPipeline) doAsyncFetch( 380 ctx context.Context, 381 params fetchParams, 382 ) (job *tasks.Job, err error) { 383 job = p.jobFactory( 384 ctx, 385 params, 386 ) 387 if _, err = p.fetch.queue.Enqueue(job); err != nil { 388 job.DoneWithErr(err) 389 putJob(job) 390 job = nil 391 } 392 return 393 } 394 395 func (p *IoPipeline) Prefetch(params PrefetchParams) (err error) { 396 return p.prefetchFunc(params) 397 } 398 399 func (p *IoPipeline) doFetch( 400 ctx context.Context, 401 params fetchParams, 402 ) (res any, err error) { 403 job, err := p.doAsyncFetch(ctx, params) 404 if err != nil { 405 return 406 } 407 result := job.WaitDone() 408 res, err = result.Res, result.Err 409 putJob(job) 410 return 411 } 412 413 func (p *IoPipeline) doPrefetch(params PrefetchParams) (err error) { 414 if _, err = p.prefetch.queue.Enqueue(params); err == sm.ErrFull { 415 p.stats.prefetchDropStats.Add(1) 416 } 417 // prefetch doesn't care about what type of err has occurred 418 return nil 419 } 420 421 func (p *IoPipeline) onFetch(jobs ...any) { 422 for _, j := range jobs { 423 job := j.(*tasks.Job) 424 if err := p.fetch.scheduler.Schedule(job); err != nil { 425 job.DoneWithErr(err) 426 } 427 } 428 } 429 430 func (p *IoPipeline) schedulerPrefetch(job *tasks.Job) { 431 if err := p.prefetch.scheduler.Schedule(job); err != nil { 432 job.DoneWithErr(err) 433 logutil.Debugf("err is %v", err.Error()) 434 putJob(job) 435 } else { 436 if _, err := p.waitQ.Enqueue(job); err != nil { 437 job.DoneWithErr(err) 438 logutil.Debugf("err is %v", err.Error()) 439 putJob(job) 440 } 441 } 442 } 443 444 func (p *IoPipeline) onPrefetch(items ...any) { 445 if len(items) == 0 { 446 return 447 } 448 if !p.active.Load() { 449 return 450 } 451 452 processes := make([]PrefetchParams, 0) 453 for _, item := range items { 454 option := item.(PrefetchParams) 455 if len(option.ids) == 0 { 456 job := prefetchMetaJob( 457 context.Background(), 458 item.(PrefetchParams), 459 ) 460 p.schedulerPrefetch(job) 461 continue 462 } 463 processes = append(processes, option) 464 } 465 if len(processes) == 0 { 466 return 467 } 468 merged := mergePrefetch(processes) 469 for _, option := range merged { 470 job := prefetchJob( 471 context.Background(), 472 option, 473 ) 474 p.schedulerPrefetch(job) 475 } 476 } 477 478 func (p *IoPipeline) onWait(jobs ...any) { 479 for _, j := range jobs { 480 job := j.(*tasks.Job) 481 res := job.WaitDone() 482 if res == nil { 483 logutil.Infof("job is %v", job.String()) 484 putJob(job) 485 return 486 } 487 if res.Err != nil { 488 logutil.Warnf("Prefetch %s err: %s", job.ID(), res.Err) 489 } 490 putJob(job) 491 } 492 } 493 494 func (p *IoPipeline) crontask(ctx context.Context) { 495 hb := w.NewHeartBeaterWithFunc(time.Second*10, func() { 496 logutil.Info(p.stats.selectivityStats.ExportString()) 497 // logutil.Info(p.sensors.prefetchDepth.String()) 498 // wdrops := p.stats.prefetchDropStats.SwapW(0) 499 // if wdrops > 0 { 500 // logutil.Infof("PrefetchDropStats: %d", wdrops) 501 // } 502 // logutil.Info(objectio.ExportCacheStats()) 503 }, nil) 504 hb.Start() 505 <-ctx.Done() 506 hb.Stop() 507 }