github.com/adacta-ru/mattermost-server/v6@v6.0.0/services/searchengine/bleveengine/indexer/indexing_job.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See LICENSE.txt for license information. 3 4 package ebleveengine 5 6 import ( 7 "context" 8 "net/http" 9 "strconv" 10 "time" 11 12 "github.com/adacta-ru/mattermost-server/v6/app" 13 "github.com/adacta-ru/mattermost-server/v6/jobs" 14 tjobs "github.com/adacta-ru/mattermost-server/v6/jobs/interfaces" 15 "github.com/adacta-ru/mattermost-server/v6/mlog" 16 "github.com/adacta-ru/mattermost-server/v6/model" 17 "github.com/adacta-ru/mattermost-server/v6/services/searchengine/bleveengine" 18 ) 19 20 const ( 21 BATCH_SIZE = 1000 22 TIME_BETWEEN_BATCHES = 100 23 ESTIMATED_POST_COUNT = 10000000 24 ESTIMATED_CHANNEL_COUNT = 100000 25 ESTIMATED_USER_COUNT = 10000 26 ) 27 28 func init() { 29 app.RegisterJobsBleveIndexerInterface(func(s *app.Server) tjobs.IndexerJobInterface { 30 return &BleveIndexerInterfaceImpl{s} 31 }) 32 } 33 34 type BleveIndexerInterfaceImpl struct { 35 Server *app.Server 36 } 37 38 type BleveIndexerWorker struct { 39 name string 40 stop chan bool 41 stopped chan bool 42 jobs chan model.Job 43 jobServer *jobs.JobServer 44 45 engine *bleveengine.BleveEngine 46 } 47 48 func (bi *BleveIndexerInterfaceImpl) MakeWorker() model.Worker { 49 if bi.Server.SearchEngine.BleveEngine == nil { 50 return nil 51 } 52 return &BleveIndexerWorker{ 53 name: "BleveIndexer", 54 stop: make(chan bool, 1), 55 stopped: make(chan bool, 1), 56 jobs: make(chan model.Job), 57 jobServer: bi.Server.Jobs, 58 59 engine: bi.Server.SearchEngine.BleveEngine.(*bleveengine.BleveEngine), 60 } 61 } 62 63 type IndexingProgress struct { 64 Now time.Time 65 StartAtTime int64 66 EndAtTime int64 67 LastEntityTime int64 68 TotalPostsCount int64 69 DonePostsCount int64 70 DonePosts bool 71 TotalChannelsCount int64 72 DoneChannelsCount int64 73 DoneChannels bool 74 TotalUsersCount int64 75 DoneUsersCount int64 76 DoneUsers bool 77 } 78 79 func (ip *IndexingProgress) CurrentProgress() int64 { 80 return (ip.DonePostsCount + ip.DoneChannelsCount + ip.DoneUsersCount) * 100 / (ip.TotalPostsCount + ip.TotalChannelsCount + ip.TotalUsersCount) 81 } 82 83 func (ip *IndexingProgress) IsDone() bool { 84 return ip.DonePosts && ip.DoneChannels && ip.DoneUsers 85 } 86 87 func (worker *BleveIndexerWorker) JobChannel() chan<- model.Job { 88 return worker.jobs 89 } 90 91 func (worker *BleveIndexerWorker) Run() { 92 mlog.Debug("Worker Started", mlog.String("workername", worker.name)) 93 94 defer func() { 95 mlog.Debug("Worker: Finished", mlog.String("workername", worker.name)) 96 worker.stopped <- true 97 }() 98 99 for { 100 select { 101 case <-worker.stop: 102 mlog.Debug("Worker: Received stop signal", mlog.String("workername", worker.name)) 103 return 104 case job := <-worker.jobs: 105 mlog.Debug("Worker: Received a new candidate job.", mlog.String("workername", worker.name)) 106 worker.DoJob(&job) 107 } 108 } 109 } 110 111 func (worker *BleveIndexerWorker) Stop() { 112 mlog.Debug("Worker Stopping", mlog.String("workername", worker.name)) 113 worker.stop <- true 114 <-worker.stopped 115 } 116 117 func (worker *BleveIndexerWorker) DoJob(job *model.Job) { 118 claimed, err := worker.jobServer.ClaimJob(job) 119 if err != nil { 120 mlog.Warn("Worker: Error occurred while trying to claim job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 121 return 122 } 123 if !claimed { 124 return 125 } 126 127 mlog.Info("Worker: Indexing job claimed by worker", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 128 129 if !worker.engine.IsActive() { 130 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.engine_inactive", nil, "", http.StatusInternalServerError) 131 if err := worker.jobServer.SetJobError(job, appError); err != nil { 132 mlog.Error("Worker: Failed to run job as ") 133 } 134 return 135 } 136 137 progress := IndexingProgress{ 138 Now: time.Now(), 139 DonePosts: false, 140 DoneChannels: false, 141 DoneUsers: false, 142 StartAtTime: 0, 143 EndAtTime: model.GetMillis(), 144 } 145 146 // Extract the start and end times, if they are set. 147 if startString, ok := job.Data["start_time"]; ok { 148 startInt, err := strconv.ParseInt(startString, 10, 64) 149 if err != nil { 150 mlog.Error("Worker: Failed to parse start_time for job", mlog.String("workername", worker.name), mlog.String("start_time", startString), mlog.String("job_id", job.Id), mlog.Err(err)) 151 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.parse_start_time.error", nil, err.Error(), http.StatusInternalServerError) 152 if err := worker.jobServer.SetJobError(job, appError); err != nil { 153 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 154 } 155 return 156 } 157 progress.StartAtTime = startInt 158 progress.LastEntityTime = progress.StartAtTime 159 } else { 160 // Set start time to oldest post in the database. 161 oldestPost, err := worker.jobServer.Store.Post().GetOldest() 162 if err != nil { 163 mlog.Error("Worker: Failed to fetch oldest post for job.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.String("start_time", startString), mlog.Err(err)) 164 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.get_oldest_post.error", nil, err.Error(), http.StatusInternalServerError) 165 if err := worker.jobServer.SetJobError(job, appError); err != nil { 166 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 167 } 168 return 169 } 170 progress.StartAtTime = oldestPost.CreateAt 171 progress.LastEntityTime = progress.StartAtTime 172 } 173 174 if endString, ok := job.Data["end_time"]; ok { 175 endInt, err := strconv.ParseInt(endString, 10, 64) 176 if err != nil { 177 mlog.Error("Worker: Failed to parse end_time for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.String("end_time", endString), mlog.Err(err)) 178 appError := model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.do_job.parse_end_time.error", nil, err.Error(), http.StatusInternalServerError) 179 if err := worker.jobServer.SetJobError(job, appError); err != nil { 180 mlog.Error("Worker: Failed to set job errorv", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err), mlog.NamedErr("set_error", appError)) 181 } 182 return 183 } 184 progress.EndAtTime = endInt 185 } 186 187 // Counting all posts may fail or timeout when the posts table is large. If this happens, log a warning, but carry 188 // on with the indexing job anyway. The only issue is that the progress % reporting will be inaccurate. 189 if count, err := worker.jobServer.Store.Post().AnalyticsPostCount("", false, false); err != nil { 190 mlog.Warn("Worker: Failed to fetch total post count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 191 progress.TotalPostsCount = ESTIMATED_POST_COUNT 192 } else { 193 progress.TotalPostsCount = count 194 } 195 196 // Same possible fail as above can happen when counting channels 197 if count, err := worker.jobServer.Store.Channel().AnalyticsTypeCount("", "O"); err != nil { 198 mlog.Warn("Worker: Failed to fetch total channel count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 199 progress.TotalChannelsCount = ESTIMATED_CHANNEL_COUNT 200 } else { 201 progress.TotalChannelsCount = count 202 } 203 204 // Same possible fail as above can happen when counting users 205 if count, err := worker.jobServer.Store.User().Count(model.UserCountOptions{}); err != nil { 206 mlog.Warn("Worker: Failed to fetch total user count for job. An estimated value will be used for progress reporting.", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 207 progress.TotalUsersCount = ESTIMATED_USER_COUNT 208 } else { 209 progress.TotalUsersCount = count 210 } 211 212 cancelCtx, cancelCancelWatcher := context.WithCancel(context.Background()) 213 cancelWatcherChan := make(chan interface{}, 1) 214 go worker.jobServer.CancellationWatcher(cancelCtx, job.Id, cancelWatcherChan) 215 216 defer cancelCancelWatcher() 217 218 for { 219 select { 220 case <-cancelWatcherChan: 221 mlog.Info("Worker: Indexing job has been canceled via CancellationWatcher", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 222 if err := worker.jobServer.SetJobCanceled(job); err != nil { 223 mlog.Error("Worker: Failed to mark job as cancelled", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 224 } 225 return 226 227 case <-worker.stop: 228 mlog.Info("Worker: Indexing has been canceled via Worker Stop", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 229 if err := worker.jobServer.SetJobCanceled(job); err != nil { 230 mlog.Error("Worker: Failed to mark job as canceled", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 231 } 232 return 233 234 case <-time.After(TIME_BETWEEN_BATCHES * time.Millisecond): 235 var err *model.AppError 236 if progress, err = worker.IndexBatch(progress); err != nil { 237 mlog.Error("Worker: Failed to index batch for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 238 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 239 mlog.Error("Worker: Failed to set job error", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 240 } 241 return 242 } 243 244 if err := worker.jobServer.SetJobProgress(job, progress.CurrentProgress()); err != nil { 245 mlog.Error("Worker: Failed to set progress for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 246 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 247 mlog.Error("Worker: Failed to set error for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 248 } 249 return 250 } 251 252 if progress.IsDone() { 253 if err := worker.jobServer.SetJobSuccess(job); err != nil { 254 mlog.Error("Worker: Failed to set success for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err)) 255 if err2 := worker.jobServer.SetJobError(job, err); err2 != nil { 256 mlog.Error("Worker: Failed to set error for job", mlog.String("workername", worker.name), mlog.String("job_id", job.Id), mlog.Err(err2), mlog.NamedErr("set_error", err)) 257 } 258 } 259 mlog.Info("Worker: Indexing job finished successfully", mlog.String("workername", worker.name), mlog.String("job_id", job.Id)) 260 return 261 } 262 } 263 } 264 } 265 266 func (worker *BleveIndexerWorker) IndexBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 267 if !progress.DonePosts { 268 return worker.IndexPostsBatch(progress) 269 } 270 if !progress.DoneChannels { 271 return worker.IndexChannelsBatch(progress) 272 } 273 if !progress.DoneUsers { 274 return worker.IndexUsersBatch(progress) 275 } 276 return progress, model.NewAppError("BleveIndexerWorker", "bleveengine.indexer.index_batch.nothing_left_to_index.error", nil, "", http.StatusInternalServerError) 277 } 278 279 func (worker *BleveIndexerWorker) IndexPostsBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 280 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 281 282 var posts []*model.PostForIndexing 283 284 tries := 0 285 for posts == nil { 286 var err error 287 posts, err = worker.jobServer.Store.Post().GetPostsBatchForIndexing(progress.LastEntityTime, endTime, BATCH_SIZE) 288 if err != nil { 289 if tries >= 10 { 290 return progress, model.NewAppError("IndexPostsBatch", "app.post.get_posts_batch_for_indexing.get.app_error", nil, err.Error(), http.StatusInternalServerError) 291 } 292 mlog.Warn("Failed to get posts batch for indexing. Retrying.", mlog.Err(err)) 293 294 // Wait a bit before trying again. 295 time.Sleep(15 * time.Second) 296 } 297 298 tries++ 299 } 300 301 newLastMessageTime, err := worker.BulkIndexPosts(posts, progress) 302 if err != nil { 303 return progress, err 304 } 305 306 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 307 // case, set the "newLastMessageTime" to the endTime so we don't get stuck running the same query in a loop. 308 if len(posts) < BATCH_SIZE { 309 newLastMessageTime = endTime 310 } 311 312 // When to Stop: we index either until we pass a batch of messages where the last 313 // message is created at or after the specified end time when setting up the batch 314 // index, or until two consecutive full batches have the same end time of their final 315 // messages. This second case is safe as long as the assumption that the database 316 // cannot contain more messages with the same CreateAt time than the batch size holds. 317 if progress.EndAtTime <= newLastMessageTime { 318 progress.DonePosts = true 319 progress.LastEntityTime = progress.StartAtTime 320 } else if progress.LastEntityTime == newLastMessageTime && len(posts) == BATCH_SIZE { 321 mlog.Error("More posts with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastMessageTime), mlog.Int("Batch Size", BATCH_SIZE)) 322 progress.DonePosts = true 323 progress.LastEntityTime = progress.StartAtTime 324 } else { 325 progress.LastEntityTime = newLastMessageTime 326 } 327 328 progress.DonePostsCount += int64(len(posts)) 329 330 return progress, nil 331 } 332 333 func (worker *BleveIndexerWorker) BulkIndexPosts(posts []*model.PostForIndexing, progress IndexingProgress) (int64, *model.AppError) { 334 lastCreateAt := int64(0) 335 batch := worker.engine.PostIndex.NewBatch() 336 337 for _, post := range posts { 338 if post.DeleteAt == 0 { 339 searchPost := bleveengine.BLVPostFromPostForIndexing(post) 340 batch.Index(searchPost.Id, searchPost) 341 } else { 342 batch.Delete(post.Id) 343 } 344 345 lastCreateAt = post.CreateAt 346 } 347 348 worker.engine.Mutex.RLock() 349 defer worker.engine.Mutex.RUnlock() 350 351 if err := worker.engine.PostIndex.Batch(batch); err != nil { 352 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexPosts", "bleveengine.indexer.do_job.bulk_index_posts.batch_error", nil, err.Error(), http.StatusInternalServerError) 353 } 354 return lastCreateAt, nil 355 } 356 357 func (worker *BleveIndexerWorker) IndexChannelsBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 358 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 359 360 var channels []*model.Channel 361 362 tries := 0 363 for channels == nil { 364 var nErr error 365 channels, nErr = worker.jobServer.Store.Channel().GetChannelsBatchForIndexing(progress.LastEntityTime, endTime, BATCH_SIZE) 366 if nErr != nil { 367 if tries >= 10 { 368 return progress, model.NewAppError("BleveIndexerWorker.IndexChannelsBatch", "app.channel.get_channels_batch_for_indexing.get.app_error", nil, nErr.Error(), http.StatusInternalServerError) 369 } 370 371 mlog.Warn("Failed to get channels batch for indexing. Retrying.", mlog.Err(nErr)) 372 373 // Wait a bit before trying again. 374 time.Sleep(15 * time.Second) 375 } 376 tries++ 377 } 378 379 newLastChannelTime, err := worker.BulkIndexChannels(channels, progress) 380 if err != nil { 381 return progress, err 382 } 383 384 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 385 // case, set the "newLastChannelTime" to the endTime so we don't get stuck running the same query in a loop. 386 if len(channels) < BATCH_SIZE { 387 newLastChannelTime = endTime 388 } 389 390 // When to Stop: we index either until we pass a batch of channels where the last 391 // channel is created at or after the specified end time when setting up the batch 392 // index, or until two consecutive full batches have the same end time of their final 393 // channels. This second case is safe as long as the assumption that the database 394 // cannot contain more channels with the same CreateAt time than the batch size holds. 395 if progress.EndAtTime <= newLastChannelTime { 396 progress.DoneChannels = true 397 progress.LastEntityTime = progress.StartAtTime 398 } else if progress.LastEntityTime == newLastChannelTime && len(channels) == BATCH_SIZE { 399 mlog.Error("More channels with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastChannelTime), mlog.Int("Batch Size", BATCH_SIZE)) 400 progress.DoneChannels = true 401 progress.LastEntityTime = progress.StartAtTime 402 } else { 403 progress.LastEntityTime = newLastChannelTime 404 } 405 406 progress.DoneChannelsCount += int64(len(channels)) 407 408 return progress, nil 409 } 410 411 func (worker *BleveIndexerWorker) BulkIndexChannels(channels []*model.Channel, progress IndexingProgress) (int64, *model.AppError) { 412 lastCreateAt := int64(0) 413 batch := worker.engine.ChannelIndex.NewBatch() 414 415 for _, channel := range channels { 416 if channel.DeleteAt == 0 { 417 searchChannel := bleveengine.BLVChannelFromChannel(channel) 418 batch.Index(searchChannel.Id, searchChannel) 419 } else { 420 batch.Delete(channel.Id) 421 } 422 423 lastCreateAt = channel.CreateAt 424 } 425 426 worker.engine.Mutex.RLock() 427 defer worker.engine.Mutex.RUnlock() 428 429 if err := worker.engine.ChannelIndex.Batch(batch); err != nil { 430 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexChannels", "bleveengine.indexer.do_job.bulk_index_channels.batch_error", nil, err.Error(), http.StatusInternalServerError) 431 } 432 return lastCreateAt, nil 433 } 434 435 func (worker *BleveIndexerWorker) IndexUsersBatch(progress IndexingProgress) (IndexingProgress, *model.AppError) { 436 endTime := progress.LastEntityTime + int64(*worker.jobServer.Config().BleveSettings.BulkIndexingTimeWindowSeconds*1000) 437 438 var users []*model.UserForIndexing 439 440 tries := 0 441 for users == nil { 442 if usersBatch, err := worker.jobServer.Store.User().GetUsersBatchForIndexing(progress.LastEntityTime, endTime, BATCH_SIZE); err != nil { 443 if tries >= 10 { 444 return progress, model.NewAppError("IndexUsersBatch", "app.user.get_users_batch_for_indexing.get_users.app_error", nil, err.Error(), http.StatusInternalServerError) 445 } 446 mlog.Warn("Failed to get users batch for indexing. Retrying.", mlog.Err(err)) 447 448 // Wait a bit before trying again. 449 time.Sleep(15 * time.Second) 450 } else { 451 users = usersBatch 452 } 453 454 tries++ 455 } 456 457 newLastUserTime, err := worker.BulkIndexUsers(users, progress) 458 if err != nil { 459 return progress, err 460 } 461 462 // Due to the "endTime" parameter in the store query, we might get an incomplete batch before the end. In this 463 // case, set the "newLastUserTime" to the endTime so we don't get stuck running the same query in a loop. 464 if len(users) < BATCH_SIZE { 465 newLastUserTime = endTime 466 } 467 468 // When to Stop: we index either until we pass a batch of users where the last 469 // user is created at or after the specified end time when setting up the batch 470 // index, or until two consecutive full batches have the same end time of their final 471 // users. This second case is safe as long as the assumption that the database 472 // cannot contain more users with the same CreateAt time than the batch size holds. 473 if progress.EndAtTime <= newLastUserTime { 474 progress.DoneUsers = true 475 progress.LastEntityTime = progress.StartAtTime 476 } else if progress.LastEntityTime == newLastUserTime && len(users) == BATCH_SIZE { 477 mlog.Error("More users with the same CreateAt time were detected than the permitted batch size. Aborting indexing job.", mlog.Int64("CreateAt", newLastUserTime), mlog.Int("Batch Size", BATCH_SIZE)) 478 progress.DoneUsers = true 479 progress.LastEntityTime = progress.StartAtTime 480 } else { 481 progress.LastEntityTime = newLastUserTime 482 } 483 484 progress.DoneUsersCount += int64(len(users)) 485 486 return progress, nil 487 } 488 489 func (worker *BleveIndexerWorker) BulkIndexUsers(users []*model.UserForIndexing, progress IndexingProgress) (int64, *model.AppError) { 490 lastCreateAt := int64(0) 491 batch := worker.engine.UserIndex.NewBatch() 492 493 for _, user := range users { 494 if user.DeleteAt == 0 { 495 searchUser := bleveengine.BLVUserFromUserForIndexing(user) 496 batch.Index(searchUser.Id, searchUser) 497 } else { 498 batch.Delete(user.Id) 499 } 500 501 lastCreateAt = user.CreateAt 502 } 503 504 worker.engine.Mutex.RLock() 505 defer worker.engine.Mutex.RUnlock() 506 507 if err := worker.engine.UserIndex.Batch(batch); err != nil { 508 return 0, model.NewAppError("BleveIndexerWorker.BulkIndexUsers", "bleveengine.indexer.do_job.bulk_index_users.batch_error", nil, err.Error(), http.StatusInternalServerError) 509 } 510 return lastCreateAt, nil 511 }