github.com/haalcala/mattermost-server-change-repo/v5@v5.33.2/services/searchengine/bleveengine/indexer/indexing_job.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package ebleveengine 5 6 import ( 7 "context" 8 "net/http" 9 "strconv" 10 "time" 11 12 "github.com/mattermost/mattermost-server/v5/app" 13 "github.com/mattermost/mattermost-server/v5/jobs" 14 tjobs "github.com/mattermost/mattermost-server/v5/jobs/interfaces" 15 "github.com/mattermost/mattermost-server/v5/mlog" 16 "github.com/mattermost/mattermost-server/v5/model" 17 "github.com/mattermost/mattermost-server/v5/services/searchengine/bleveengine" 18 ) 19 20 const ( 21 BatchSize = 1000 22 TimeBetweenBatches = 100 23 EstimatedPostCount = 10000000 24 EstimatedChannelCount = 100000 25 EstimatedUserCount = 10000 26 ) 27 28 func init() { 29 app.RegisterJobsBleveIndexerInterface(func(s *app.Server) tjobs.IndexerJobInterface { 30 return &BleveIndexerInterfaceImpl{s} 31 }) 32 } 33 34 type BleveIndexerInterfaceImpl struct { 35 Server *app.Server 36 } 37 38 type BleveIndexerWorker struct { 39 name string 40 stop chan bool 41 stopped chan bool 42 jobs chan model.Job 43 jobServer *jobs.JobServer 44 45 engine *bleveengine.BleveEngine 46 } 47 48 func (bi *BleveIndexerInterfaceImpl) MakeWorker() model.Worker { 49 if bi.Server.SearchEngine.BleveEngine == nil { 50 return nil 51 } 52 return &BleveIndexerWorker{ 53 name: "BleveIndexer", 54 stop: make(chan bool, 1), 55 stopped: make(chan bool, 1), 56 jobs: make(chan model.Job), 57 jobServer: bi.Server.Jobs, 58 59 engine: bi.Server.SearchEngine.BleveEngine.(*bleveengine.BleveEngine), 60 } 61 } 62 63 type IndexingProgress struct { 64 Now time.Time 65 StartAtTime int64 66 EndAtTime int64 67 LastEntityTime int64 68 TotalPostsCount int64 69 DonePostsCount int64 70 DonePosts bool 71 TotalChannelsCount int64 72 DoneChannelsCount int64 73 DoneChannels bool 74 TotalUsersCount int64 75 DoneUsersCount int64 76 DoneUsers bool 77 } 78 79 func (ip *IndexingProgress) CurrentProgress() int64 { 80 return (ip.DonePostsCount + ip.DoneChannelsCount + ip.DoneUsersCount) * 100 / (ip.TotalPostsCount + ip.TotalChannelsCount + ip.TotalUsersCount) 81 } 82 83 func (ip *IndexingProgress) IsDone() bool { 84 return ip.DonePosts && ip.DoneChannels && ip.DoneUsers 85 } 86 87 func (worker *BleveIndexerWorker) JobChannel() chan<- model.Job { 88 return worker.jobs 89 } 90 91 func (worker *BleveIndexerWorker) Run() { 92 mlog.Debug("Worker Started", mlog.String("workername", worker.name)) 93 94 defer func() { 95 mlog.Debug("Worker: Finished", mlog.String("workername", worker.name)) 96 worker.stopped <- true 97 }() 98 99 for { 100 select { 101 case <-worker.stop: 102 mlog.Debug("Worker: Received stop signal", mlog.String("workername", worker.name)) 103 return 104 case job := <-worker.jobs: 105 mlog.Debug("Worker: Received a new candidate job.", mlog.String("workername", worker.name)) 106 worker.DoJob(&job) 107 } 108 } 109 } 110 111 func (worker *BleveIndexerWorker) Stop() { 112 mlog.Debug("Worker Stopping", mlog.String("workername", worker.name)) 113 worker.stop <- true 114 <-worker.stopped 115 } 116 117 func (worker *BleveIndexerWorker) DoJob(job *model.Job) { 118 claimed, err := worker.jobServer.ClaimJob(job) 119 if err != nil { 120 mlog.Warn("Worker: Error occurred while trying to claim job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 121 return 122 } 123 if !claimed { 124 return 125 } 126 127 mlog.Info("Worker: Indexing job claimed by worker", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 128 129 if !worker.engine.IsActive() { 130 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.engine_inactive", nil, "", http.StatusInternalServerError) 131 if err := worker.jobServer.SetJobError(job, appError); err != nil { 132 mlog.Error("Worker: Failed to run job as ") 133 } 134 return 135 } 136 137 progress := IndexingProgress{ 138 Now: time.Now(), 139 DonePosts: false, 140 DoneChannels: false, 141 DoneUsers: false, 142 StartAtTime: 0, 143 EndAtTime: model.GetMillis(), 144 } 145 146 // Extract the start and end times, if they are set. 147 if startString, ok := job.Data["start_time"]; ok { 148 startInt, err := strconv.ParseInt(startString, 10, 64) 149 if err != nil { 150 mlog.Error("Worker: Failed to parse start_time for job", mlog.String("workername", worker.name), mlog.String("start_time", startString), mlog.String("job_id", job.Id), mlog.Err(err)) 151 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.parse_start_time.error", nil, err.Error(), http.StatusInternalServerError) 152 if err := worker.jobServer.SetJobError(job, appError); err != nil { 153 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 154 } 155 return 156 } 157 progress.StartAtTime = startInt 158 progress.LastEntityTime = progress.StartAtTime 159 } else { 160 // Set start time to oldest post in the database. 161 oldestPost, err := worker.jobServer.Store.Post().GetOldest() 162 if err != nil { 163 mlog.Error("Worker: Failed to fetch oldest post for job.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.String("start_time", startString), mlog.Err(err)) 164 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.get_oldest_post.error", nil, err.Error(), http.StatusInternalServerError) 165 if err := worker.jobServer.SetJobError(job, appError); err != nil { 166 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 167 } 168 return 169 } 170 progress.StartAtTime = oldestPost.CreateAt 171 progress.LastEntityTime = progress.StartAtTime 172 } 173 174 if endString, ok := job.Data["end_time"]; ok { 175 endInt, err := strconv.ParseInt(endString, 10, 64) 176 if err != nil { 177 mlog.Error("Worker: Failed to parse end_time for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.String("end_time", endString), mlog.Err(err)) 178 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.parse_end_time.error", nil, err.Error(), http.StatusInternalServerError) 179 if err := worker.jobServer.SetJobError(job, appError); err != nil { 180 mlog.Error("Worker: Failed to set job errorv", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 181 } 182 return 183 } 184 progress.EndAtTime = endInt 185 } 186 187 // Counting all posts may fail or timeout when the posts table is large. If this happens, log a warning, but carry 188 // on with the indexing job anyway. The only issue is that the progress % reporting will be inaccurate. 189 if count, err := worker.jobServer.Store.Post().AnalyticsPostCount("", false, false); err != nil { 190 mlog.Warn("Worker: Failed to fetch total post count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 191 progress.TotalPostsCount = EstimatedPostCount 192 } else { 193 progress.TotalPostsCount = count 194 } 195 196 // Same possible fail as above can happen when counting channels 197 if count, err := worker.jobServer.Store.Channel().AnalyticsTypeCount("", "O"); err != nil { 198 mlog.Warn("Worker: Failed to fetch total channel count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 199 progress.TotalChannelsCount = EstimatedChannelCount 200 } else { 201 progress.TotalChannelsCount = count 202 } 203 204 // Same possible fail as above can happen when counting users 205 if count, err := worker.jobServer.Store.User().Count(model.UserCountOptions{}); err != nil { 206 mlog.Warn("Worker: Failed to fetch total user count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 207 progress.TotalUsersCount = EstimatedUserCount 208 } else { 209 progress.TotalUsersCount = count 210 } 211 212 cancelCtx, cancelCancelWatcher := context.WithCancel(context.Background()) 213 cancelWatcherChan := make(chan interface{}, 1) 214 go worker.jobServer.CancellationWatcher(cancelCtx, job.Id, cancelWatcherChan) 215 216 defer cancelCancelWatcher() 217 218 for { 219 select { 220 case <-cancelWatcherChan: 221 mlog.Info("Worker: Indexing job has been canceled via CancellationWatcher", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 222 if err := worker.jobServer.SetJobCanceled(job); err != nil { 223 mlog.Error("Worker: Failed to mark job as cancelled", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 224 } 225 return 226 227 case <-worker.stop: 228 mlog.Info("Worker: Indexing has been canceled via Worker Stop", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 229 if err := worker.jobServer.SetJobCanceled(job); err != nil { 230 mlog.Error("Worker: Failed to mark job as canceled", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 231 } 232 return 233 234 case <-time.After(TimeBetweenBatches * time.Millisecond): 235 var err *model.AppError 236 if progress, err = worker.IndexBatch(progress); err != nil { 237 mlog.Error("Worker: Failed to index batch for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 238 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 239 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 240 } 241 return 242 } 243 244 if err := worker.jobServer.SetJobProgress(job, progress.CurrentProgress()); err != nil { 245 mlog.Error("Worker: Failed to set progress for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 246 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 247 mlog.Error("Worker: Failed to set error for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 248 } 249 return 250 } 251 252 if progress.IsDone() { 253 if err := worker.jobServer.SetJobSuccess(job); err != nil { 254 mlog.Error("Worker: Failed to set success for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 255 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 256 mlog.Error("Worker: Failed to set error for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 257 } 258 } 259 mlog.Info("Worker: Indexing job finished successfully", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 260 return 261 } 262 } 263 } 264 } 265 266 func (worker *BleveIndexerWorker) IndexBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 267 if !progress.DonePosts { 268 return worker.IndexPostsBatch(progress) 269 } 270 if !progress.DoneChannels { 271 return worker.IndexChannelsBatch(progress) 272 } 273 if !progress.DoneUsers { 274 return worker.IndexUsersBatch(progress) 275 } 276 return progress, model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.index_batch.nothing_left_to_index.error", nil, "", http.StatusInternalServerError) 277 } 278 279 func (worker *BleveIndexerWorker) IndexPostsBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 280 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 281 282 var posts []*model.PostForIndexing 283 284 tries := 0 285 for posts == nil { 286 var err error 287 posts, err = worker.jobServer.Store.Post().GetPostsBatchForIndexing(progress.LastEntityTime, endTime, BatchSize) 288 if err != nil { 289 if tries >= 10 { 290 return progress, model.NewAppError("IndexPostsBatch", "app.post.get_posts_batch_for_indexing.get.app_error", nil, err.Error(), http.StatusInternalServerError) 291 } 292 mlog.Warn("Failed to get posts batch for indexing. Retrying.", mlog.Err(err)) 293 294 // Wait a bit before trying again. 295 time.Sleep(15 * time.Second) 296 } 297 298 tries++ 299 } 300 301 newLastMessageTime, err := worker.BulkIndexPosts(posts, progress) 302 if err != nil { 303 return progress, err 304 } 305 306 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 307 // case, set the "newLastMessageTime" to the endTime so we don't get stuck running the same query in a loop. 308 if len(posts) < BatchSize { 309 newLastMessageTime = endTime 310 } 311 312 // When to Stop: we index either until we pass a batch of messages where the last 313 // message is created at or after the specified end time when setting up the batch 314 // index, or until two consecutive full batches have the same end time of their final 315 // messages. This second case is safe as long as the assumption that the database 316 // cannot contain more messages with the same CreateAt time than the batch size holds. 317 if progress.EndAtTime <= newLastMessageTime { 318 progress.DonePosts = true 319 progress.LastEntityTime = progress.StartAtTime 320 } else if progress.LastEntityTime == newLastMessageTime && len(posts) == BatchSize { 321 mlog.Warn("More posts with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastMessageTime), mlog.Int("Batch Size", BatchSize)) 322 progress.DonePosts = true 323 progress.LastEntityTime = progress.StartAtTime 324 } else { 325 progress.LastEntityTime = newLastMessageTime 326 } 327 328 progress.DonePostsCount += int64(len(posts)) 329 330 return progress, nil 331 } 332 333 func (worker *BleveIndexerWorker) BulkIndexPosts(posts []*model.PostForIndexing, progress IndexingProgress) (int64, *model.AppError) { 334 lastCreateAt := int64(0) 335 batch := worker.engine.PostIndex.NewBatch() 336 337 for _, post := range posts { 338 if post.DeleteAt == 0 { 339 searchPost := bleveengine.BLVPostFromPostForIndexing(post) 340 batch.Index(searchPost.Id, searchPost) 341 } else { 342 batch.Delete(post.Id) 343 } 344 345 lastCreateAt = post.CreateAt 346 } 347 348 worker.engine.Mutex.RLock() 349 defer worker.engine.Mutex.RUnlock() 350 351 if err := worker.engine.PostIndex.Batch(batch); err != nil { 352 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexPosts", "bleveengine.indexer.do_job.bulk_index_posts.batch_error", nil, err.Error(), http.StatusInternalServerError) 353 } 354 return lastCreateAt, nil 355 } 356 357 func (worker *BleveIndexerWorker) BulkIndexFiles(files []*model.FileForIndexing, progress IndexingProgress) (int64, *model.AppError) { 358 lastCreateAt := int64(0) 359 batch := worker.engine.FileIndex.NewBatch() 360 361 for _, file := range files { 362 if file.DeleteAt == 0 { 363 searchFile := bleveengine.BLVFileFromFileForIndexing(file) 364 batch.Index(searchFile.Id, searchFile) 365 } else { 366 batch.Delete(file.Id) 367 } 368 369 lastCreateAt = file.CreateAt 370 } 371 372 worker.engine.Mutex.RLock() 373 defer worker.engine.Mutex.RUnlock() 374 375 if err := worker.engine.FileIndex.Batch(batch); err != nil { 376 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexPosts", "bleveengine.indexer.do_job.bulk_index_files.batch_error", nil, err.Error(), http.StatusInternalServerError) 377 } 378 return lastCreateAt, nil 379 } 380 381 func (worker *BleveIndexerWorker) IndexChannelsBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 382 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 383 384 var channels []*model.Channel 385 386 tries := 0 387 for channels == nil { 388 var nErr error 389 channels, nErr = worker.jobServer.Store.Channel().GetChannelsBatchForIndexing(progress.LastEntityTime, endTime, BatchSize) 390 if nErr != nil { 391 if tries >= 10 { 392 return progress, model.NewAppError("BleveIndexerWorker.IndexChannelsBatch", "app.channel.get_channels_batch_for_indexing.get.app_error", nil, nErr.Error(), http.StatusInternalServerError) 393 } 394 395 mlog.Warn("Failed to get channels batch for indexing. Retrying.", mlog.Err(nErr)) 396 397 // Wait a bit before trying again. 398 time.Sleep(15 * time.Second) 399 } 400 tries++ 401 } 402 403 newLastChannelTime, err := worker.BulkIndexChannels(channels, progress) 404 if err != nil { 405 return progress, err 406 } 407 408 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 409 // case, set the "newLastChannelTime" to the endTime so we don't get stuck running the same query in a loop. 410 if len(channels) < BatchSize { 411 newLastChannelTime = endTime 412 } 413 414 // When to Stop: we index either until we pass a batch of channels where the last 415 // channel is created at or after the specified end time when setting up the batch 416 // index, or until two consecutive full batches have the same end time of their final 417 // channels. This second case is safe as long as the assumption that the database 418 // cannot contain more channels with the same CreateAt time than the batch size holds. 419 if progress.EndAtTime <= newLastChannelTime { 420 progress.DoneChannels = true 421 progress.LastEntityTime = progress.StartAtTime 422 } else if progress.LastEntityTime == newLastChannelTime && len(channels) == BatchSize { 423 mlog.Warn("More channels with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastChannelTime), mlog.Int("Batch Size", BatchSize)) 424 progress.DoneChannels = true 425 progress.LastEntityTime = progress.StartAtTime 426 } else { 427 progress.LastEntityTime = newLastChannelTime 428 } 429 430 progress.DoneChannelsCount += int64(len(channels)) 431 432 return progress, nil 433 } 434 435 func (worker *BleveIndexerWorker) BulkIndexChannels(channels []*model.Channel, progress IndexingProgress) (int64, *model.AppError) { 436 lastCreateAt := int64(0) 437 batch := worker.engine.ChannelIndex.NewBatch() 438 439 for _, channel := range channels { 440 if channel.DeleteAt == 0 { 441 searchChannel := bleveengine.BLVChannelFromChannel(channel) 442 batch.Index(searchChannel.Id, searchChannel) 443 } else { 444 batch.Delete(channel.Id) 445 } 446 447 lastCreateAt = channel.CreateAt 448 } 449 450 worker.engine.Mutex.RLock() 451 defer worker.engine.Mutex.RUnlock() 452 453 if err := worker.engine.ChannelIndex.Batch(batch); err != nil { 454 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexChannels", "bleveengine.indexer.do_job.bulk_index_channels.batch_error", nil, err.Error(), http.StatusInternalServerError) 455 } 456 return lastCreateAt, nil 457 } 458 459 func (worker *BleveIndexerWorker) IndexUsersBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 460 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 461 462 var users []*model.UserForIndexing 463 464 tries := 0 465 for users == nil { 466 if usersBatch, err := worker.jobServer.Store.User().GetUsersBatchForIndexing(progress.LastEntityTime, endTime, BatchSize); err != nil { 467 if tries >= 10 { 468 return progress, model.NewAppError("IndexUsersBatch", "app.user.get_users_batch_for_indexing.get_users.app_error", nil, err.Error(), http.StatusInternalServerError) 469 } 470 mlog.Warn("Failed to get users batch for indexing. Retrying.", mlog.Err(err)) 471 472 // Wait a bit before trying again. 473 time.Sleep(15 * time.Second) 474 } else { 475 users = usersBatch 476 } 477 478 tries++ 479 } 480 481 newLastUserTime, err := worker.BulkIndexUsers(users, progress) 482 if err != nil { 483 return progress, err 484 } 485 486 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 487 // case, set the "newLastUserTime" to the endTime so we don't get stuck running the same query in a loop. 488 if len(users) < BatchSize { 489 newLastUserTime = endTime 490 } 491 492 // When to Stop: we index either until we pass a batch of users where the last 493 // user is created at or after the specified end time when setting up the batch 494 // index, or until two consecutive full batches have the same end time of their final 495 // users. This second case is safe as long as the assumption that the database 496 // cannot contain more users with the same CreateAt time than the batch size holds. 497 if progress.EndAtTime <= newLastUserTime { 498 progress.DoneUsers = true 499 progress.LastEntityTime = progress.StartAtTime 500 } else if progress.LastEntityTime == newLastUserTime && len(users) == BatchSize { 501 mlog.Warn("More users with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastUserTime), mlog.Int("Batch Size", BatchSize)) 502 progress.DoneUsers = true 503 progress.LastEntityTime = progress.StartAtTime 504 } else { 505 progress.LastEntityTime = newLastUserTime 506 } 507 508 progress.DoneUsersCount += int64(len(users)) 509 510 return progress, nil 511 } 512 513 func (worker *BleveIndexerWorker) BulkIndexUsers(users []*model.UserForIndexing, progress IndexingProgress) (int64, *model.AppError) { 514 lastCreateAt := int64(0) 515 batch := worker.engine.UserIndex.NewBatch() 516 517 for _, user := range users { 518 if user.DeleteAt == 0 { 519 searchUser := bleveengine.BLVUserFromUserForIndexing(user) 520 batch.Index(searchUser.Id, searchUser) 521 } else { 522 batch.Delete(user.Id) 523 } 524 525 lastCreateAt = user.CreateAt 526 } 527 528 worker.engine.Mutex.RLock() 529 defer worker.engine.Mutex.RUnlock() 530 531 if err := worker.engine.UserIndex.Batch(batch); err != nil { 532 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexUsers", "bleveengine.indexer.do_job.bulk_index_users.batch_error", nil, err.Error(), http.StatusInternalServerError) 533 } 534 return lastCreateAt, nil 535 }