github.com/jfrog/jfrog-cli-core/v2@v2.51.0/artifactory/commands/transferfiles/manager.go (about) 1 package transferfiles 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "time" 9 10 "github.com/jfrog/gofrog/datastructures" 11 "github.com/jfrog/gofrog/parallel" 12 "github.com/jfrog/jfrog-cli-core/v2/artifactory/commands/transfer" 13 "github.com/jfrog/jfrog-cli-core/v2/artifactory/commands/transferfiles/api" 14 "github.com/jfrog/jfrog-cli-core/v2/artifactory/commands/transferfiles/state" 15 clientUtils "github.com/jfrog/jfrog-client-go/utils" 16 "github.com/jfrog/jfrog-client-go/utils/errorutils" 17 "github.com/jfrog/jfrog-client-go/utils/log" 18 ) 19 20 const ( 21 totalNumberPollingGoRoutines = 2 22 tasksMaxCapacity = 5000000 23 ) 24 25 type transferManager struct { 26 phaseBase 27 delayUploadComparisonFunctions []shouldDelayUpload 28 } 29 30 func newTransferManager(base phaseBase, delayUploadComparisonFunctions []shouldDelayUpload) *transferManager { 31 return &transferManager{phaseBase: base, delayUploadComparisonFunctions: delayUploadComparisonFunctions} 32 } 33 34 type transferActionWithProducerConsumerType func( 35 pcWrapper *producerConsumerWrapper, 36 uploadChunkChan chan UploadedChunk, 37 delayHelper delayUploadHelper, 38 errorsChannelMng *ErrorsChannelMng) error 39 40 type transferDelayAction func(phase phaseBase, addedDelayFiles []string) error 41 42 // Transfer files using the 'producer-consumer' mechanism and apply a delay action. 43 func (ftm *transferManager) doTransferWithProducerConsumer(transferAction transferActionWithProducerConsumerType, delayAction transferDelayAction) error { 44 // Set the producer-consumer value into the referenced value. This allow the Graceful Stop mechanism to access ftm.pcDetails when needed to stop the transfer. 45 *ftm.pcDetails = newProducerConsumerWrapper() 46 return ftm.doTransfer(ftm.pcDetails, transferAction, delayAction) 47 } 48 49 // This function handles a transfer process as part of a phase. 50 // As part of the process, the transferAction gets executed. It may utilize a producer consumer or not. 51 // The transferAction collects artifacts to be uploaded into chunks, and sends them to the source Artifactory instance to handle asynchronously. 52 // An uuid token will be returned and sent in a channel to be polled on for status in pollUploads. 53 // In some repositories the order of deployment is important. In these cases, any artifacts that should be delayed will be collected by 54 // the delayedArtifactsMng and will later be handled by handleDelayedArtifactsFiles. 55 // Any deployment failures will be written to a file by the transferErrorsMng to be handled on next run. 56 // The number of threads affect both the producer consumer if used, and limits the number of uploaded chunks. The number can be externally modified, 57 // and will be updated on runtime by periodicallyUpdateThreads. 58 func (ftm *transferManager) doTransfer(pcWrapper *producerConsumerWrapper, transferAction transferActionWithProducerConsumerType, delayAction transferDelayAction) error { 59 uploadChunkChan := make(chan UploadedChunk, transfer.MaxThreadsLimit) 60 var runWaitGroup sync.WaitGroup 61 var writersWaitGroup sync.WaitGroup 62 63 // Manager for the transfer's errors statuses writing mechanism 64 errorsChannelMng := createErrorsChannelMng() 65 transferErrorsMng, err := newTransferErrorsToFile(ftm.repoKey, ftm.phaseId, state.ConvertTimeToEpochMilliseconds(ftm.startTime), &errorsChannelMng, ftm.progressBar, ftm.stateManager) 66 if err != nil { 67 return err 68 } 69 writersWaitGroup.Add(1) 70 go func() { 71 defer writersWaitGroup.Done() 72 errorsChannelMng.err = transferErrorsMng.start() 73 }() 74 75 // Manager for the transfer's delayed artifacts writing mechanism 76 delayedArtifactsChannelMng := createdDelayedArtifactsChannelMng() 77 delayedArtifactsMng, err := newTransferDelayedArtifactsManager(&delayedArtifactsChannelMng, ftm.repoKey, state.ConvertTimeToEpochMilliseconds(ftm.startTime)) 78 if err != nil { 79 return err 80 } 81 if len(ftm.delayUploadComparisonFunctions) > 0 { 82 writersWaitGroup.Add(1) 83 go func() { 84 defer writersWaitGroup.Done() 85 delayedArtifactsChannelMng.err = delayedArtifactsMng.start() 86 }() 87 } 88 89 pollingTasksManager := newPollingTasksManager(totalNumberPollingGoRoutines) 90 err = pollingTasksManager.start(&ftm.phaseBase, &runWaitGroup, pcWrapper, uploadChunkChan, &errorsChannelMng) 91 if err != nil { 92 pollingTasksManager.stop() 93 return err 94 } 95 // Transfer action to execute. 96 runWaitGroup.Add(1) 97 var actionErr error 98 var delayUploadHelper = delayUploadHelper{ 99 ftm.delayUploadComparisonFunctions, 100 &delayedArtifactsChannelMng, 101 } 102 go func() { 103 defer runWaitGroup.Done() 104 actionErr = transferAction(pcWrapper, uploadChunkChan, delayUploadHelper, &errorsChannelMng) 105 if pcWrapper == nil { 106 pollingTasksManager.stop() 107 } 108 }() 109 110 // Run producer consumers. This is a blocking function, which makes sure the producer consumers are closed before anything else. 111 executionErr := runProducerConsumers(pcWrapper) 112 pollingTasksManager.stop() 113 // Wait for 'transferAction', producer consumers and polling go routines to exit. 114 runWaitGroup.Wait() 115 // Close writer channels. 116 errorsChannelMng.close() 117 delayedArtifactsChannelMng.close() 118 // Wait for writers channels to exit. Writers must exit last. 119 writersWaitGroup.Wait() 120 121 var returnedError error 122 for _, err := range []error{actionErr, errorsChannelMng.err, delayedArtifactsChannelMng.err, executionErr, ftm.getInterruptionErr()} { 123 if err != nil { 124 log.Error(err) 125 returnedError = err 126 } 127 } 128 129 // If delayed action was provided, handle it now. 130 if returnedError == nil && delayAction != nil { 131 var addedDelayFiles []string 132 // If the transfer generated new delay files provide them 133 if delayedArtifactsMng.delayedWriter != nil { 134 addedDelayFiles = delayedArtifactsMng.delayedWriter.contentFiles 135 } 136 returnedError = delayAction(ftm.phaseBase, addedDelayFiles) 137 } 138 return returnedError 139 } 140 141 type PollingTasksManager struct { 142 // Done channel notifies the polling go routines that no more tasks are expected. 143 doneChannel chan bool 144 // Number of go routines expected to write to the doneChannel 145 totalGoRoutines int 146 // The actual number of running go routines 147 totalRunningGoRoutines int 148 } 149 150 func newPollingTasksManager(totalGoRoutines int) PollingTasksManager { 151 // The channel's size is 'totalGoRoutines', since there are a limited number of routines that need to be signaled to stop by 'doneChannel'. 152 return PollingTasksManager{doneChannel: make(chan bool, totalGoRoutines), totalGoRoutines: totalGoRoutines} 153 } 154 155 // Runs 2 go routines: 156 // 1. Periodically update the worker threads count & check whether the process should be stopped. 157 // 2. Poll for uploaded chunks. 158 func (ptm *PollingTasksManager) start(phaseBase *phaseBase, runWaitGroup *sync.WaitGroup, pcWrapper *producerConsumerWrapper, uploadChunkChan chan UploadedChunk, errorsChannelMng *ErrorsChannelMng) error { 159 // Update threads by polling on the settings file. 160 runWaitGroup.Add(1) 161 err := ptm.addGoRoutine() 162 if err != nil { 163 return err 164 } 165 go func() { 166 defer runWaitGroup.Done() 167 periodicallyUpdateThreadsAndStopStatus(pcWrapper, ptm.doneChannel, phaseBase.buildInfoRepo, phaseBase.stopSignal) 168 }() 169 170 // Check status of uploaded chunks. 171 runWaitGroup.Add(1) 172 err = ptm.addGoRoutine() 173 if err != nil { 174 return err 175 } 176 go func() { 177 defer runWaitGroup.Done() 178 pollUploads(pcWrapper, phaseBase, phaseBase.srcUpService, uploadChunkChan, ptm.doneChannel, errorsChannelMng) 179 }() 180 return nil 181 } 182 183 func (ptm *PollingTasksManager) addGoRoutine() error { 184 if ptm.totalGoRoutines < ptm.totalRunningGoRoutines+1 { 185 return errorutils.CheckErrorf("can't create another polling go routine. maximum number of go routines is: %d", ptm.totalGoRoutines) 186 } 187 ptm.totalRunningGoRoutines++ 188 return nil 189 } 190 191 func (ptm *PollingTasksManager) stop() { 192 // Notify the other go routines that work is done. 193 for i := 0; i < ptm.totalRunningGoRoutines; i++ { 194 ptm.doneChannel <- true 195 } 196 } 197 198 func newProducerConsumerWrapper() producerConsumerWrapper { 199 chunkUploaderProducerConsumer := parallel.NewRunner(GetChunkUploaderThreads(), tasksMaxCapacity, false) 200 chunkBuilderProducerConsumer := parallel.NewRunner(GetChunkBuilderThreads(), tasksMaxCapacity, false) 201 chunkUploaderProducerConsumer.SetFinishedNotification(true) 202 chunkBuilderProducerConsumer.SetFinishedNotification(true) 203 errorsQueue := clientUtils.NewErrorsQueue(1) 204 205 return producerConsumerWrapper{ 206 chunkUploaderProducerConsumer: chunkUploaderProducerConsumer, 207 chunkBuilderProducerConsumer: chunkBuilderProducerConsumer, 208 errorsQueue: errorsQueue, 209 } 210 } 211 212 // Run the two producer consumer that run the transfer. 213 // When a producer consumer is idle for assumeProducerConsumerDoneWhenIdleForSeconds (no tasks are being handled) 214 // the work is assumed to be done. 215 // Order in this function matters! We want to make sure chunkUploaderProducerConsumer is only done after chunkBuilderProducerConsumer is done. 216 func runProducerConsumers(pcWrapper *producerConsumerWrapper) (executionErr error) { 217 go func() { 218 pcWrapper.chunkUploaderProducerConsumer.Run() 219 }() 220 go func() { 221 // Wait till notified that the builder has no additional tasks, and close the builder producer consumer. 222 <-pcWrapper.chunkBuilderProducerConsumer.GetFinishedNotification() 223 log.Debug("Chunk builder producer consumer has completed all tasks. " + 224 "All files relevant to this phase were found and added to chunks that are being uploaded...") 225 pcWrapper.chunkBuilderProducerConsumer.Done() 226 }() 227 228 // Run() is a blocking method, so once all chunk builders are idle, the tasks queue closes and Run() stops running. 229 pcWrapper.chunkBuilderProducerConsumer.Run() 230 if pcWrapper.chunkUploaderProducerConsumer.IsStarted() { 231 // There might be a moment when the chunk uploader has no upload tasks. 232 // This circumstance might lead to setting the finish notification before completing all file uploads. 233 // To address this, we reset the finish notification to ensure no remaining upload tasks after the next finish notification. 234 pcWrapper.chunkUploaderProducerConsumer.ResetFinishNotificationIfActive() 235 // Wait till notified that the uploader finished its tasks, and it will not receive new tasks from the builder. 236 <-pcWrapper.chunkUploaderProducerConsumer.GetFinishedNotification() 237 log.Debug("Chunk uploaded producer consumer has completed all tasks. All files relevant to this phase have all been uploaded.") 238 } 239 // Close the tasks queue with Done(). 240 pcWrapper.chunkUploaderProducerConsumer.Done() 241 executionErr = pcWrapper.errorsQueue.GetError() 242 return 243 } 244 245 // This function polls on chunks of files that were uploaded during one of the phases. 246 // It does so by requesting the status of each chunk, by sending the uuid token that was returned when the chunk was uploaded. 247 // Number of chunks is limited by the number of threads. 248 // Whenever the status of a chunk was received and is DONE, its token is removed from the tokens batch, making room for a new chunk to be uploaded 249 // and a new token to be polled on. 250 func pollUploads(pcWrapper *producerConsumerWrapper, phaseBase *phaseBase, srcUpService *srcUserPluginService, uploadChunkChan chan UploadedChunk, doneChan chan bool, errorsChannelMng *ErrorsChannelMng) { 251 curTokensBatch := api.UploadChunksStatusBody{} 252 chunksLifeCycleManager := ChunksLifeCycleManager{ 253 deletedChunksSet: datastructures.MakeSet[api.ChunkId](), 254 nodeToChunksMap: make(map[api.NodeId]map[api.ChunkId]UploadedChunkData), 255 } 256 var timeEstMng *state.TimeEstimationManager 257 if phaseBase != nil { 258 timeEstMng = &phaseBase.stateManager.TimeEstimationManager 259 } 260 for i := 0; ; i++ { 261 if ShouldStop(phaseBase, nil, errorsChannelMng) { 262 log.Debug("Stop signal received while polling on uploads...") 263 return 264 } 265 time.Sleep(waitTimeBetweenChunkStatusSeconds * time.Second) 266 267 // Run once per 5 minutes 268 if i%60 == 0 { 269 // 'Working threads' are determined by how many upload chunks are currently being processed by the source Artifactory instance. 270 if err := phaseBase.stateManager.SetWorkingThreads(pcWrapper.totalProcessedUploadChunks); err != nil { 271 log.Error("Couldn't set the current number of working threads:", err.Error()) 272 } 273 log.Debug("There are", len(phaseBase.stateManager.StaleChunks), "chunks in transit for more than 30 minutes") 274 log.Debug(fmt.Sprintf("Chunks in transit: %v", chunksLifeCycleManager.GetNodeIdToChunkIdsMap())) 275 } 276 277 // Each uploading thread receives a token and a node id from the source via the uploadChunkChan, so this go routine can poll on its status. 278 activeChunks := fillChunkDataBatch(&chunksLifeCycleManager, uploadChunkChan) 279 if err := chunksLifeCycleManager.StoreStaleChunks(phaseBase.stateManager); err != nil { 280 log.Error("Couldn't store the stale chunks:", err.Error()) 281 } 282 // When totalChunks size is zero, it means that all the tokens are uploaded, 283 // we received 'DONE' for all of them, and we notified the source that they can be deleted from the memory. 284 // If during the polling some chunks data were lost due to network issues, either on the client or on the source, 285 // it will be written to the error channel 286 if activeChunks == 0 { 287 if shouldStopPolling(doneChan) { 288 log.Debug("Stopping to poll on uploads...") 289 return 290 } 291 log.Debug("Active chunks counter is 0, but the 'done' signal hasn't been received yet") 292 continue 293 } 294 295 chunksStatus, err := sendSyncChunksRequest(curTokensBatch, &chunksLifeCycleManager, srcUpService) 296 if err != nil { 297 continue 298 } 299 // Clear body for the next request 300 curTokensBatch = api.UploadChunksStatusBody{} 301 removeDeletedChunksFromSet(chunksStatus.DeletedChunks, chunksLifeCycleManager.deletedChunksSet) 302 toStop := handleChunksStatuses(pcWrapper, phaseBase, &chunksStatus, &chunksLifeCycleManager, timeEstMng, errorsChannelMng) 303 if toStop { 304 return 305 } 306 } 307 } 308 309 // Fill chunk data batch till full. Return if no new chunk data is available. 310 func fillChunkDataBatch(chunksLifeCycleManager *ChunksLifeCycleManager, uploadChunkChan chan UploadedChunk) (activeChunks int) { 311 for _, activeNodeChunks := range chunksLifeCycleManager.nodeToChunksMap { 312 activeChunks += len(activeNodeChunks) 313 } 314 for ; activeChunks < GetChunkUploaderThreads(); activeChunks++ { 315 select { 316 case data := <-uploadChunkChan: 317 currentNodeId := api.NodeId(data.NodeId) 318 currentChunkId := api.ChunkId(data.UuidToken) 319 if _, exist := chunksLifeCycleManager.nodeToChunksMap[currentNodeId]; !exist { 320 chunksLifeCycleManager.nodeToChunksMap[currentNodeId] = make(map[api.ChunkId]UploadedChunkData) 321 } 322 chunksLifeCycleManager.nodeToChunksMap[currentNodeId][currentChunkId] = data.UploadedChunkData 323 default: 324 // No new tokens are waiting. 325 return 326 } 327 } 328 return 329 } 330 331 func shouldStopPolling(doneChan chan bool) bool { 332 select { 333 case done := <-doneChan: 334 return done 335 default: 336 } 337 return false 338 } 339 340 // Send and handle. 341 func sendSyncChunksRequest(curTokensBatch api.UploadChunksStatusBody, chunksLifeCycleManager *ChunksLifeCycleManager, srcUpService *srcUserPluginService) (api.UploadChunksStatusResponse, error) { 342 curTokensBatch.AwaitingStatusChunks = chunksLifeCycleManager.GetInProgressTokensSlice() 343 curTokensBatch.ChunksToDelete = chunksLifeCycleManager.deletedChunksSet.ToSlice() 344 chunksStatus, err := srcUpService.syncChunks(curTokensBatch) 345 // Log the error only if the transfer wasn't interrupted by the user 346 if err != nil && !errors.Is(err, context.Canceled) { 347 log.Error("error returned when getting upload chunks statuses: " + err.Error()) 348 } 349 return chunksStatus, err 350 } 351 352 func removeDeletedChunksFromSet(deletedChunks []string, deletedChunksSet *datastructures.Set[api.ChunkId]) { 353 // deletedChunks is an array received from the source, confirming which chunks were deleted from the source side. 354 // In deletedChunksSet, we keep only chunks for which we have yet to receive confirmation 355 for _, deletedChunk := range deletedChunks { 356 err := deletedChunksSet.Remove(api.ChunkId(deletedChunk)) 357 if err != nil { 358 log.Error(err.Error()) 359 continue 360 } 361 } 362 } 363 364 // handleChunksStatuses handles the chunk statuses from the response received from the source Artifactory Instance. 365 // It syncs the chunk status between the CLI and the source Artifactory instance, 366 // When a chunk is DONE, the progress bar is updated, and the number of working threads is decreased. 367 func handleChunksStatuses(pcWrapper *producerConsumerWrapper, phase *phaseBase, chunksStatus *api.UploadChunksStatusResponse, 368 chunksLifeCycleManager *ChunksLifeCycleManager, timeEstMng *state.TimeEstimationManager, errorsChannelMng *ErrorsChannelMng) bool { 369 checkChunkStatusSync(pcWrapper, chunksStatus, chunksLifeCycleManager, errorsChannelMng) 370 for _, chunk := range chunksStatus.ChunksStatus { 371 if chunk.UuidToken == "" { 372 log.Error("Unexpected empty uuid token in status") 373 continue 374 } 375 switch chunk.Status { 376 case api.InProgress: 377 continue 378 case api.Done: 379 pcWrapper.decProcessedChunks() 380 log.Debug("Received status DONE for chunk '" + chunk.UuidToken + "'") 381 382 chunkSentTime := chunksLifeCycleManager.nodeToChunksMap[api.NodeId(chunksStatus.NodeId)][api.ChunkId(chunk.UuidToken)].TimeSent 383 err := updateProgress(phase, timeEstMng, chunk, chunkSentTime) 384 if err != nil { 385 log.Error("Unexpected error in progress update: " + err.Error()) 386 continue 387 } 388 delete(chunksLifeCycleManager.nodeToChunksMap[api.NodeId(chunksStatus.NodeId)], api.ChunkId(chunk.UuidToken)) 389 // Using the deletedChunksSet, we inform the source that the 'DONE' message has been received, and it no longer has to keep those chunks UUIDs. 390 chunksLifeCycleManager.deletedChunksSet.Add(api.ChunkId(chunk.UuidToken)) 391 stopped := handleFilesOfCompletedChunk(chunk.Files, errorsChannelMng) 392 // In case an error occurred while writing errors status's to the errors file - stop transferring. 393 if stopped { 394 log.Debug("Stop signal received while handling chunks statuses...") 395 return true 396 } 397 err = setChunkCompletedInRepoSnapshot(phase.stateManager, chunk.Files) 398 if err != nil { 399 log.Error(err) 400 continue 401 } 402 } 403 } 404 return false 405 } 406 407 func updateProgress(phase *phaseBase, timeEstMng *state.TimeEstimationManager, 408 chunk api.ChunkStatus, chunkSentTime time.Time) error { 409 if phase == nil { 410 return nil 411 } 412 413 err := state.UpdateChunkInState(phase.stateManager, &chunk) 414 if err != nil { 415 return err 416 } 417 418 if timeEstMng != nil { 419 timeEstMng.AddChunkStatus(chunk, time.Since(chunkSentTime).Milliseconds()) 420 } 421 return nil 422 } 423 424 // Verify and handle in progress chunks synchronization between the CLI and the Source Artifactory instance 425 func checkChunkStatusSync(pcWrapper *producerConsumerWrapper, chunkStatus *api.UploadChunksStatusResponse, chunksLifeCycleManager *ChunksLifeCycleManager, errorsChannelMng *ErrorsChannelMng) { 426 // Compare between the number of chunks received from the latest syncChunks request to the chunks data we handle locally in nodeToChunksMap. 427 // If the number of the in progress chunks of a node within nodeToChunksMap differs from the chunkStatus received, there is missing data on the source side. 428 expectedChunksInNode := len(chunksLifeCycleManager.nodeToChunksMap[api.NodeId(chunkStatus.NodeId)]) 429 actualChunksInNode := len(chunkStatus.ChunksStatus) 430 if actualChunksInNode != expectedChunksInNode { 431 log.Info(fmt.Printf("NodeID %s: Missing chunks detected. Expected: %d, Received: %d. Storing absent chunks in error channels for later retry.", 432 chunkStatus.NodeId, expectedChunksInNode, actualChunksInNode)) 433 // Get all the chunks uuids on the Artifactory side in a set of uuids 434 chunksUuidsSetFromResponse := datastructures.MakeSet[api.ChunkId]() 435 for _, chunk := range chunkStatus.ChunksStatus { 436 chunksUuidsSetFromResponse.Add(api.ChunkId(chunk.UuidToken)) 437 } 438 // Get all the chunks uuids on the CLI side 439 chunksUuidsSliceFromMap := chunksLifeCycleManager.GetInProgressTokensSliceByNodeId(api.NodeId(chunkStatus.NodeId)) 440 failedFile := api.FileUploadStatusResponse{ 441 Status: api.Fail, 442 StatusCode: SyncErrorStatusCode, 443 Reason: SyncErrorReason, 444 } 445 // Send all missing chunks from the source Artifactory instance to errorsChannelMng 446 // Missing chunks are those that are inside chunksUuidsSliceFromMap but not in chunksUuidsSetFromResponse 447 for _, chunkUuid := range chunksUuidsSliceFromMap { 448 if !chunksUuidsSetFromResponse.Exists(chunkUuid) { 449 for _, file := range chunksLifeCycleManager.nodeToChunksMap[api.NodeId(chunkStatus.NodeId)][chunkUuid].ChunkFiles { 450 failedFile.FileRepresentation = file 451 // errorsChannelMng will upload failed files again in phase 3 or in an additional transfer file run. 452 addErrorToChannel(errorsChannelMng, failedFile) 453 } 454 delete(chunksLifeCycleManager.nodeToChunksMap[api.NodeId(chunkStatus.NodeId)], chunkUuid) 455 pcWrapper.decProcessedChunks() 456 } 457 } 458 } 459 }