github.com/iikira/iikira-go-utils@v0.0.0-20230610031953-f2cb11cde33a/requester/downloader/monitor.go (about)

     1  package downloader
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"github.com/iikira/iikira-go-utils/pcsverbose"
     7  	"github.com/iikira/iikira-go-utils/requester/transfer"
     8  	"sort"
     9  	"time"
    10  )
    11  
    12  var (
    13  	//ErrNoWokers no workers
    14  	ErrNoWokers = errors.New("no workers")
    15  )
    16  
    17  type (
    18  	//Monitor 线程监控器
    19  	Monitor struct {
    20  		workers         WorkerList
    21  		status          *transfer.DownloadStatus
    22  		instanceState   *InstanceState
    23  		completed       chan struct{}
    24  		err             error
    25  		resetController *ResetController
    26  		isReloadWorker  bool //是否重载worker, 单线程模式不重载
    27  
    28  		// 临时变量
    29  		lastAvaliableIndex int
    30  	}
    31  
    32  	// RangeWorkerFunc 遍历workers的函数
    33  	RangeWorkerFunc func(key int, worker *Worker) bool
    34  )
    35  
    36  //NewMonitor 初始化Monitor
    37  func NewMonitor() *Monitor {
    38  	monitor := &Monitor{}
    39  	return monitor
    40  }
    41  
    42  func (mt *Monitor) lazyInit() {
    43  	if mt.workers == nil {
    44  		mt.workers = make(WorkerList, 0, 100)
    45  	}
    46  	if mt.status == nil {
    47  		mt.status = transfer.NewDownloadStatus()
    48  	}
    49  	if mt.resetController == nil {
    50  		mt.resetController = NewResetController(80)
    51  	}
    52  }
    53  
    54  //InitMonitorCapacity 初始化workers, 用于Append
    55  func (mt *Monitor) InitMonitorCapacity(capacity int) {
    56  	mt.workers = make(WorkerList, 0, capacity)
    57  }
    58  
    59  //Append 增加Worker
    60  func (mt *Monitor) Append(worker *Worker) {
    61  	if worker == nil {
    62  		return
    63  	}
    64  	mt.workers = append(mt.workers, worker)
    65  }
    66  
    67  //SetWorkers 设置workers, 此操作会覆盖原有的workers
    68  func (mt *Monitor) SetWorkers(workers WorkerList) {
    69  	mt.workers = workers
    70  }
    71  
    72  //SetStatus 设置DownloadStatus
    73  func (mt *Monitor) SetStatus(status *transfer.DownloadStatus) {
    74  	mt.status = status
    75  }
    76  
    77  //SetInstanceState 设置状态
    78  func (mt *Monitor) SetInstanceState(instanceState *InstanceState) {
    79  	mt.instanceState = instanceState
    80  }
    81  
    82  //Status 返回DownloadStatus
    83  func (mt *Monitor) Status() *transfer.DownloadStatus {
    84  	return mt.status
    85  }
    86  
    87  //Err 返回遇到的错误
    88  func (mt *Monitor) Err() error {
    89  	return mt.err
    90  }
    91  
    92  //CompletedChan 获取completed chan
    93  func (mt *Monitor) CompletedChan() <-chan struct{} {
    94  	return mt.completed
    95  }
    96  
    97  //GetAvailableWorker 获取空闲的worker
    98  func (mt *Monitor) GetAvailableWorker() *Worker {
    99  	workerCount := len(mt.workers)
   100  	for i := mt.lastAvaliableIndex; i < mt.lastAvaliableIndex+workerCount; i++ {
   101  		index := i % workerCount
   102  		worker := mt.workers[index]
   103  		if worker.Completed() {
   104  			mt.lastAvaliableIndex = index
   105  			return worker
   106  		}
   107  	}
   108  	return nil
   109  }
   110  
   111  //GetAllWorkersRange 获取所有worker的范围
   112  func (mt *Monitor) GetAllWorkersRange() transfer.RangeList {
   113  	allWorkerRanges := make(transfer.RangeList, 0, len(mt.workers))
   114  	for _, worker := range mt.workers {
   115  		allWorkerRanges = append(allWorkerRanges, worker.GetRange())
   116  	}
   117  	return allWorkerRanges
   118  }
   119  
   120  //NumLeftWorkers 剩余的worker数量
   121  func (mt *Monitor) NumLeftWorkers() (num int) {
   122  	for _, worker := range mt.workers {
   123  		if !worker.Completed() {
   124  			num++
   125  		}
   126  	}
   127  	return
   128  }
   129  
   130  //SetReloadWorker 是否重载worker
   131  func (mt *Monitor) SetReloadWorker(b bool) {
   132  	mt.isReloadWorker = b
   133  }
   134  
   135  //IsLeftWorkersAllFailed 剩下的线程是否全部失败
   136  func (mt *Monitor) IsLeftWorkersAllFailed() bool {
   137  	failedNum := 0
   138  	for _, worker := range mt.workers {
   139  		if worker.Completed() {
   140  			continue
   141  		}
   142  
   143  		if !worker.Failed() {
   144  			failedNum++
   145  			return false
   146  		}
   147  	}
   148  	return failedNum != 0
   149  }
   150  
   151  //registerAllCompleted 全部完成则发送消息
   152  func (mt *Monitor) registerAllCompleted() {
   153  	mt.completed = make(chan struct{}, 0)
   154  	var (
   155  		workerNum   = len(mt.workers)
   156  		completeNum = 0
   157  	)
   158  
   159  	go func() {
   160  		for {
   161  			time.Sleep(1 * time.Second)
   162  
   163  			completeNum = 0
   164  			for _, worker := range mt.workers {
   165  				switch worker.GetStatus().StatusCode() {
   166  				case StatusCodeInternalError:
   167  					// 检测到内部错误
   168  					// 马上停止执行
   169  					mt.err = worker.Err()
   170  					close(mt.completed)
   171  					return
   172  				case StatusCodeSuccessed, StatusCodeCanceled:
   173  					completeNum++
   174  				}
   175  			}
   176  			// status 在 lazyInit 之后, 不可能为空
   177  			// 完成条件: 所有worker 都已经完成, 且 rangeGen 已生成完毕
   178  			gen := mt.status.RangeListGen()
   179  			if completeNum >= workerNum && (gen == nil || gen.IsDone()) { // 已完成
   180  				close(mt.completed)
   181  				return
   182  			}
   183  		}
   184  	}()
   185  }
   186  
   187  //ResetFailedAndNetErrorWorkers 重设部分网络错误的worker
   188  func (mt *Monitor) ResetFailedAndNetErrorWorkers() {
   189  	for k := range mt.workers {
   190  		if !mt.resetController.CanReset() {
   191  			continue
   192  		}
   193  
   194  		switch mt.workers[k].GetStatus().StatusCode() {
   195  		case StatusCodeNetError:
   196  			pcsverbose.Verbosef("DEBUG: monitor: ResetFailedAndNetErrorWorkers: reset StatusCodeNetError worker, id: %d\n", mt.workers[k].id)
   197  			goto reset
   198  		case StatusCodeFailed:
   199  			pcsverbose.Verbosef("DEBUG: monitor: ResetFailedAndNetErrorWorkers: reset StatusCodeFailed worker, id: %d\n", mt.workers[k].id)
   200  			goto reset
   201  		default:
   202  			continue
   203  		}
   204  
   205  	reset:
   206  		mt.workers[k].Reset()
   207  		mt.resetController.AddResetNum()
   208  	}
   209  }
   210  
   211  //RangeWorker 遍历worker
   212  func (mt *Monitor) RangeWorker(f RangeWorkerFunc) {
   213  	for k := range mt.workers {
   214  		if !f(k, mt.workers[k]) {
   215  			break
   216  		}
   217  	}
   218  }
   219  
   220  //Pause 暂停所有的下载
   221  func (mt *Monitor) Pause() {
   222  	for k := range mt.workers {
   223  		mt.workers[k].Pause()
   224  	}
   225  }
   226  
   227  //Resume 恢复所有的下载
   228  func (mt *Monitor) Resume() {
   229  	for k := range mt.workers {
   230  		mt.workers[k].Resume()
   231  	}
   232  }
   233  
   234  // TryAddNewWork 尝试加入新range
   235  func (mt *Monitor) TryAddNewWork() {
   236  	if mt.status == nil {
   237  		return
   238  	}
   239  	gen := mt.status.RangeListGen()
   240  	if gen == nil || gen.IsDone() {
   241  		return
   242  	}
   243  
   244  	if !mt.resetController.CanReset() { //能否建立新连接
   245  		return
   246  	}
   247  
   248  	availableWorker := mt.GetAvailableWorker()
   249  	if availableWorker == nil {
   250  		return
   251  	}
   252  
   253  	// 有空闲的range, 执行
   254  	_, r := gen.GenRange()
   255  	if r == nil {
   256  		// 没有range了
   257  		return
   258  	}
   259  
   260  	availableWorker.SetRange(r)
   261  	availableWorker.ClearStatus()
   262  
   263  	mt.resetController.AddResetNum()
   264  	pcsverbose.Verbosef("MONITER: worker[%d] add new range: %s\n", availableWorker.ID(), r.ShowDetails())
   265  	go availableWorker.Execute()
   266  }
   267  
   268  // DynamicSplitWorker 动态分配线程
   269  func (mt *Monitor) DynamicSplitWorker(worker *Worker) {
   270  	if !mt.resetController.CanReset() {
   271  		return
   272  	}
   273  
   274  	switch worker.status.statusCode {
   275  	case StatusCodeDownloading, StatusCodeFailed, StatusCodeNetError:
   276  	//pass
   277  	default:
   278  		return
   279  	}
   280  
   281  	// 筛选空闲的Worker
   282  	availableWorker := mt.GetAvailableWorker()
   283  	if availableWorker == nil || worker == availableWorker { // 没有空的
   284  		return
   285  	}
   286  
   287  	workerRange := worker.GetRange()
   288  
   289  	end := workerRange.LoadEnd()
   290  	middle := (workerRange.LoadBegin() + end) / 2
   291  
   292  	if end-middle < MinParallelSize/5 { // 如果线程剩余的下载量太少, 不分配空闲线程
   293  		return
   294  	}
   295  
   296  	// 折半
   297  	availableWorkerRange := availableWorker.GetRange()
   298  	availableWorkerRange.StoreBegin(middle) // middle不能加1
   299  	availableWorkerRange.StoreEnd(end)
   300  	availableWorker.ClearStatus()
   301  
   302  	workerRange.StoreEnd(middle)
   303  
   304  	mt.resetController.AddResetNum()
   305  	pcsverbose.Verbosef("MONITOR: worker duplicated: %d <- %d\n", availableWorker.ID(), worker.ID())
   306  	go availableWorker.Execute()
   307  }
   308  
   309  // ResetWorker 重设长时间无响应, 和下载速度为 0 的 Worker
   310  func (mt *Monitor) ResetWorker(worker *Worker) {
   311  	if !mt.resetController.CanReset() { //达到最大重载次数
   312  		return
   313  	}
   314  
   315  	if worker.Completed() {
   316  		return
   317  	}
   318  
   319  	// 忽略正在写入数据到硬盘的
   320  	// 过滤速度有变化的线程
   321  	status := worker.GetStatus()
   322  	speeds := worker.GetSpeedsPerSecond()
   323  	if speeds != 0 {
   324  		return
   325  	}
   326  
   327  	switch status.StatusCode() {
   328  	case StatusCodePending, StatusCodeReseted:
   329  		fallthrough
   330  	case StatusCodeWaitToWrite: // 正在写入数据
   331  		fallthrough
   332  	case StatusCodePaused: // 已暂停
   333  		// 忽略, 返回
   334  		return
   335  	}
   336  
   337  	mt.resetController.AddResetNum()
   338  
   339  	// 重设连接
   340  	pcsverbose.Verbosef("MONITOR: worker[%d] reload\n", worker.ID())
   341  	worker.Reset()
   342  }
   343  
   344  //Execute 执行任务
   345  func (mt *Monitor) Execute(cancelCtx context.Context) {
   346  	if len(mt.workers) == 0 {
   347  		mt.err = ErrNoWokers
   348  		return
   349  	}
   350  
   351  	mt.lazyInit()
   352  	for _, worker := range mt.workers {
   353  		worker.SetDownloadStatus(mt.status)
   354  		go worker.Execute()
   355  	}
   356  
   357  	mt.registerAllCompleted() // 注册completed
   358  	ticker := time.NewTicker(990 * time.Millisecond)
   359  	defer ticker.Stop()
   360  
   361  	//开始监控
   362  	for {
   363  		select {
   364  		case <-cancelCtx.Done():
   365  			for _, worker := range mt.workers {
   366  				err := worker.Cancel()
   367  				if err != nil {
   368  					pcsverbose.Verbosef("DEBUG: cancel failed, worker id: %d, err: %s\n", worker.ID(), err)
   369  				}
   370  			}
   371  			return
   372  		case <-mt.completed:
   373  			return
   374  		case <-ticker.C:
   375  			// 初始化监控工作
   376  			mt.ResetFailedAndNetErrorWorkers()
   377  
   378  			mt.status.UpdateSpeeds() // 更新速度
   379  
   380  			// 保存断点信息到文件
   381  			if mt.instanceState != nil {
   382  				mt.instanceState.Put(&transfer.DownloadInstanceInfo{
   383  					DownloadStatus: mt.status,
   384  					Ranges:         mt.GetAllWorkersRange(),
   385  				})
   386  			}
   387  
   388  			// 加入新range
   389  			mt.TryAddNewWork()
   390  
   391  			// 不重载worker
   392  			if !mt.isReloadWorker {
   393  				continue
   394  			}
   395  
   396  			// 更新maxSpeeds
   397  			mt.status.SetMaxSpeeds(mt.status.SpeedsPerSecond())
   398  
   399  			// 速度减慢或者全部失败, 开始监控
   400  			// 只有一个worker时不重设连接
   401  			isLeftWorkersAllFailed := mt.IsLeftWorkersAllFailed()
   402  			if mt.status.SpeedsPerSecond() < mt.status.MaxSpeeds()/6 || isLeftWorkersAllFailed {
   403  				if isLeftWorkersAllFailed {
   404  					pcsverbose.Verbosef("DEBUG: monitor: All workers failed\n")
   405  				}
   406  				mt.status.ClearMaxSpeeds() //清空最大速度的统计
   407  
   408  				// 先进行动态分配线程
   409  				pcsverbose.Verbosef("DEBUG: monitor: start duplicate.\n")
   410  				sort.Sort(ByLeftDesc{mt.workers})
   411  				for _, worker := range mt.workers {
   412  					//动态分配线程
   413  					mt.DynamicSplitWorker(worker)
   414  				}
   415  
   416  				// 重设长时间无响应, 和下载速度为 0 的线程
   417  				pcsverbose.Verbosef("DEBUG: monitor: start reload.\n")
   418  				for _, worker := range mt.workers {
   419  					mt.ResetWorker(worker)
   420  				}
   421  			} // end if 2
   422  		} //end select
   423  	} //end for
   424  }