github.com/iikira/iikira-go-utils@v0.0.0-20230610031953-f2cb11cde33a/requester/downloader/monitor.go (about) 1 package downloader 2 3 import ( 4 "context" 5 "errors" 6 "github.com/iikira/iikira-go-utils/pcsverbose" 7 "github.com/iikira/iikira-go-utils/requester/transfer" 8 "sort" 9 "time" 10 ) 11 12 var ( 13 //ErrNoWokers no workers 14 ErrNoWokers = errors.New("no workers") 15 ) 16 17 type ( 18 //Monitor 线程监控器 19 Monitor struct { 20 workers WorkerList 21 status *transfer.DownloadStatus 22 instanceState *InstanceState 23 completed chan struct{} 24 err error 25 resetController *ResetController 26 isReloadWorker bool //是否重载worker, 单线程模式不重载 27 28 // 临时变量 29 lastAvaliableIndex int 30 } 31 32 // RangeWorkerFunc 遍历workers的函数 33 RangeWorkerFunc func(key int, worker *Worker) bool 34 ) 35 36 //NewMonitor 初始化Monitor 37 func NewMonitor() *Monitor { 38 monitor := &Monitor{} 39 return monitor 40 } 41 42 func (mt *Monitor) lazyInit() { 43 if mt.workers == nil { 44 mt.workers = make(WorkerList, 0, 100) 45 } 46 if mt.status == nil { 47 mt.status = transfer.NewDownloadStatus() 48 } 49 if mt.resetController == nil { 50 mt.resetController = NewResetController(80) 51 } 52 } 53 54 //InitMonitorCapacity 初始化workers, 用于Append 55 func (mt *Monitor) InitMonitorCapacity(capacity int) { 56 mt.workers = make(WorkerList, 0, capacity) 57 } 58 59 //Append 增加Worker 60 func (mt *Monitor) Append(worker *Worker) { 61 if worker == nil { 62 return 63 } 64 mt.workers = append(mt.workers, worker) 65 } 66 67 //SetWorkers 设置workers, 此操作会覆盖原有的workers 68 func (mt *Monitor) SetWorkers(workers WorkerList) { 69 mt.workers = workers 70 } 71 72 //SetStatus 设置DownloadStatus 73 func (mt *Monitor) SetStatus(status *transfer.DownloadStatus) { 74 mt.status = status 75 } 76 77 //SetInstanceState 设置状态 78 func (mt *Monitor) SetInstanceState(instanceState *InstanceState) { 79 mt.instanceState = instanceState 80 } 81 82 //Status 返回DownloadStatus 83 func (mt *Monitor) Status() *transfer.DownloadStatus { 84 return mt.status 85 } 86 87 //Err 返回遇到的错误 88 func (mt *Monitor) Err() error { 89 return mt.err 90 } 91 92 //CompletedChan 获取completed chan 93 func (mt *Monitor) CompletedChan() <-chan struct{} { 94 return mt.completed 95 } 96 97 //GetAvailableWorker 获取空闲的worker 98 func (mt *Monitor) GetAvailableWorker() *Worker { 99 workerCount := len(mt.workers) 100 for i := mt.lastAvaliableIndex; i < mt.lastAvaliableIndex+workerCount; i++ { 101 index := i % workerCount 102 worker := mt.workers[index] 103 if worker.Completed() { 104 mt.lastAvaliableIndex = index 105 return worker 106 } 107 } 108 return nil 109 } 110 111 //GetAllWorkersRange 获取所有worker的范围 112 func (mt *Monitor) GetAllWorkersRange() transfer.RangeList { 113 allWorkerRanges := make(transfer.RangeList, 0, len(mt.workers)) 114 for _, worker := range mt.workers { 115 allWorkerRanges = append(allWorkerRanges, worker.GetRange()) 116 } 117 return allWorkerRanges 118 } 119 120 //NumLeftWorkers 剩余的worker数量 121 func (mt *Monitor) NumLeftWorkers() (num int) { 122 for _, worker := range mt.workers { 123 if !worker.Completed() { 124 num++ 125 } 126 } 127 return 128 } 129 130 //SetReloadWorker 是否重载worker 131 func (mt *Monitor) SetReloadWorker(b bool) { 132 mt.isReloadWorker = b 133 } 134 135 //IsLeftWorkersAllFailed 剩下的线程是否全部失败 136 func (mt *Monitor) IsLeftWorkersAllFailed() bool { 137 failedNum := 0 138 for _, worker := range mt.workers { 139 if worker.Completed() { 140 continue 141 } 142 143 if !worker.Failed() { 144 failedNum++ 145 return false 146 } 147 } 148 return failedNum != 0 149 } 150 151 //registerAllCompleted 全部完成则发送消息 152 func (mt *Monitor) registerAllCompleted() { 153 mt.completed = make(chan struct{}, 0) 154 var ( 155 workerNum = len(mt.workers) 156 completeNum = 0 157 ) 158 159 go func() { 160 for { 161 time.Sleep(1 * time.Second) 162 163 completeNum = 0 164 for _, worker := range mt.workers { 165 switch worker.GetStatus().StatusCode() { 166 case StatusCodeInternalError: 167 // 检测到内部错误 168 // 马上停止执行 169 mt.err = worker.Err() 170 close(mt.completed) 171 return 172 case StatusCodeSuccessed, StatusCodeCanceled: 173 completeNum++ 174 } 175 } 176 // status 在 lazyInit 之后, 不可能为空 177 // 完成条件: 所有worker 都已经完成, 且 rangeGen 已生成完毕 178 gen := mt.status.RangeListGen() 179 if completeNum >= workerNum && (gen == nil || gen.IsDone()) { // 已完成 180 close(mt.completed) 181 return 182 } 183 } 184 }() 185 } 186 187 //ResetFailedAndNetErrorWorkers 重设部分网络错误的worker 188 func (mt *Monitor) ResetFailedAndNetErrorWorkers() { 189 for k := range mt.workers { 190 if !mt.resetController.CanReset() { 191 continue 192 } 193 194 switch mt.workers[k].GetStatus().StatusCode() { 195 case StatusCodeNetError: 196 pcsverbose.Verbosef("DEBUG: monitor: ResetFailedAndNetErrorWorkers: reset StatusCodeNetError worker, id: %d\n", mt.workers[k].id) 197 goto reset 198 case StatusCodeFailed: 199 pcsverbose.Verbosef("DEBUG: monitor: ResetFailedAndNetErrorWorkers: reset StatusCodeFailed worker, id: %d\n", mt.workers[k].id) 200 goto reset 201 default: 202 continue 203 } 204 205 reset: 206 mt.workers[k].Reset() 207 mt.resetController.AddResetNum() 208 } 209 } 210 211 //RangeWorker 遍历worker 212 func (mt *Monitor) RangeWorker(f RangeWorkerFunc) { 213 for k := range mt.workers { 214 if !f(k, mt.workers[k]) { 215 break 216 } 217 } 218 } 219 220 //Pause 暂停所有的下载 221 func (mt *Monitor) Pause() { 222 for k := range mt.workers { 223 mt.workers[k].Pause() 224 } 225 } 226 227 //Resume 恢复所有的下载 228 func (mt *Monitor) Resume() { 229 for k := range mt.workers { 230 mt.workers[k].Resume() 231 } 232 } 233 234 // TryAddNewWork 尝试加入新range 235 func (mt *Monitor) TryAddNewWork() { 236 if mt.status == nil { 237 return 238 } 239 gen := mt.status.RangeListGen() 240 if gen == nil || gen.IsDone() { 241 return 242 } 243 244 if !mt.resetController.CanReset() { //能否建立新连接 245 return 246 } 247 248 availableWorker := mt.GetAvailableWorker() 249 if availableWorker == nil { 250 return 251 } 252 253 // 有空闲的range, 执行 254 _, r := gen.GenRange() 255 if r == nil { 256 // 没有range了 257 return 258 } 259 260 availableWorker.SetRange(r) 261 availableWorker.ClearStatus() 262 263 mt.resetController.AddResetNum() 264 pcsverbose.Verbosef("MONITER: worker[%d] add new range: %s\n", availableWorker.ID(), r.ShowDetails()) 265 go availableWorker.Execute() 266 } 267 268 // DynamicSplitWorker 动态分配线程 269 func (mt *Monitor) DynamicSplitWorker(worker *Worker) { 270 if !mt.resetController.CanReset() { 271 return 272 } 273 274 switch worker.status.statusCode { 275 case StatusCodeDownloading, StatusCodeFailed, StatusCodeNetError: 276 //pass 277 default: 278 return 279 } 280 281 // 筛选空闲的Worker 282 availableWorker := mt.GetAvailableWorker() 283 if availableWorker == nil || worker == availableWorker { // 没有空的 284 return 285 } 286 287 workerRange := worker.GetRange() 288 289 end := workerRange.LoadEnd() 290 middle := (workerRange.LoadBegin() + end) / 2 291 292 if end-middle < MinParallelSize/5 { // 如果线程剩余的下载量太少, 不分配空闲线程 293 return 294 } 295 296 // 折半 297 availableWorkerRange := availableWorker.GetRange() 298 availableWorkerRange.StoreBegin(middle) // middle不能加1 299 availableWorkerRange.StoreEnd(end) 300 availableWorker.ClearStatus() 301 302 workerRange.StoreEnd(middle) 303 304 mt.resetController.AddResetNum() 305 pcsverbose.Verbosef("MONITOR: worker duplicated: %d <- %d\n", availableWorker.ID(), worker.ID()) 306 go availableWorker.Execute() 307 } 308 309 // ResetWorker 重设长时间无响应, 和下载速度为 0 的 Worker 310 func (mt *Monitor) ResetWorker(worker *Worker) { 311 if !mt.resetController.CanReset() { //达到最大重载次数 312 return 313 } 314 315 if worker.Completed() { 316 return 317 } 318 319 // 忽略正在写入数据到硬盘的 320 // 过滤速度有变化的线程 321 status := worker.GetStatus() 322 speeds := worker.GetSpeedsPerSecond() 323 if speeds != 0 { 324 return 325 } 326 327 switch status.StatusCode() { 328 case StatusCodePending, StatusCodeReseted: 329 fallthrough 330 case StatusCodeWaitToWrite: // 正在写入数据 331 fallthrough 332 case StatusCodePaused: // 已暂停 333 // 忽略, 返回 334 return 335 } 336 337 mt.resetController.AddResetNum() 338 339 // 重设连接 340 pcsverbose.Verbosef("MONITOR: worker[%d] reload\n", worker.ID()) 341 worker.Reset() 342 } 343 344 //Execute 执行任务 345 func (mt *Monitor) Execute(cancelCtx context.Context) { 346 if len(mt.workers) == 0 { 347 mt.err = ErrNoWokers 348 return 349 } 350 351 mt.lazyInit() 352 for _, worker := range mt.workers { 353 worker.SetDownloadStatus(mt.status) 354 go worker.Execute() 355 } 356 357 mt.registerAllCompleted() // 注册completed 358 ticker := time.NewTicker(990 * time.Millisecond) 359 defer ticker.Stop() 360 361 //开始监控 362 for { 363 select { 364 case <-cancelCtx.Done(): 365 for _, worker := range mt.workers { 366 err := worker.Cancel() 367 if err != nil { 368 pcsverbose.Verbosef("DEBUG: cancel failed, worker id: %d, err: %s\n", worker.ID(), err) 369 } 370 } 371 return 372 case <-mt.completed: 373 return 374 case <-ticker.C: 375 // 初始化监控工作 376 mt.ResetFailedAndNetErrorWorkers() 377 378 mt.status.UpdateSpeeds() // 更新速度 379 380 // 保存断点信息到文件 381 if mt.instanceState != nil { 382 mt.instanceState.Put(&transfer.DownloadInstanceInfo{ 383 DownloadStatus: mt.status, 384 Ranges: mt.GetAllWorkersRange(), 385 }) 386 } 387 388 // 加入新range 389 mt.TryAddNewWork() 390 391 // 不重载worker 392 if !mt.isReloadWorker { 393 continue 394 } 395 396 // 更新maxSpeeds 397 mt.status.SetMaxSpeeds(mt.status.SpeedsPerSecond()) 398 399 // 速度减慢或者全部失败, 开始监控 400 // 只有一个worker时不重设连接 401 isLeftWorkersAllFailed := mt.IsLeftWorkersAllFailed() 402 if mt.status.SpeedsPerSecond() < mt.status.MaxSpeeds()/6 || isLeftWorkersAllFailed { 403 if isLeftWorkersAllFailed { 404 pcsverbose.Verbosef("DEBUG: monitor: All workers failed\n") 405 } 406 mt.status.ClearMaxSpeeds() //清空最大速度的统计 407 408 // 先进行动态分配线程 409 pcsverbose.Verbosef("DEBUG: monitor: start duplicate.\n") 410 sort.Sort(ByLeftDesc{mt.workers}) 411 for _, worker := range mt.workers { 412 //动态分配线程 413 mt.DynamicSplitWorker(worker) 414 } 415 416 // 重设长时间无响应, 和下载速度为 0 的线程 417 pcsverbose.Verbosef("DEBUG: monitor: start reload.\n") 418 for _, worker := range mt.workers { 419 mt.ResetWorker(worker) 420 } 421 } // end if 2 422 } //end select 423 } //end for 424 }