github.com/jfrog/jfrog-cli-core/v2@v2.51.0/artifactory/commands/transferfiles/filesdiff.go (about) 1 package transferfiles 2 3 import ( 4 "fmt" 5 "path" 6 "time" 7 8 "github.com/jfrog/gofrog/parallel" 9 "github.com/jfrog/jfrog-cli-core/v2/artifactory/commands/transferfiles/api" 10 servicesUtils "github.com/jfrog/jfrog-client-go/artifactory/services/utils" 11 clientUtils "github.com/jfrog/jfrog-client-go/utils" 12 "github.com/jfrog/jfrog-client-go/utils/errorutils" 13 "github.com/jfrog/jfrog-client-go/utils/log" 14 ) 15 16 // When handling files diff, we split the whole time range being handled by searchTimeFramesMinutes in order to receive smaller results from the AQLs. 17 const searchTimeFramesMinutes = 15 18 19 // Manages the phase of fixing files diffs (files that were created/modified after they were transferred), 20 // and handling transfer failures that have been collected during previous runs and phases. 21 type filesDiffPhase struct { 22 phaseBase 23 } 24 25 func (f *filesDiffPhase) initProgressBar() error { 26 if f.progressBar != nil { 27 f.progressBar.AddPhase2() 28 } 29 return nil 30 } 31 32 func (f *filesDiffPhase) getPhaseName() string { 33 return "Files Diff Handling Phase" 34 } 35 36 func (f *filesDiffPhase) phaseStarted() error { 37 f.startTime = time.Now() 38 return f.stateManager.AddNewDiffToState(f.startTime) 39 } 40 41 func (f *filesDiffPhase) phaseDone() error { 42 // If the phase stopped gracefully, don't mark the phase as completed 43 if !f.ShouldStop() { 44 if err := f.stateManager.SetFilesDiffHandlingCompleted(); err != nil { 45 return err 46 } 47 } 48 49 if f.progressBar != nil { 50 return f.progressBar.DonePhase(f.phaseId) 51 } 52 return nil 53 } 54 55 func (f *filesDiffPhase) shouldSkipPhase() (bool, error) { 56 return false, nil 57 } 58 59 func (f *filesDiffPhase) run() error { 60 return f.handleDiffTimeFrames() 61 } 62 63 // Split the time range of fixing files diffs into smaller time frames and handle them separately with smaller AQLs. 64 // Diffs found (files created/modifies) are uploaded in chunks, then polled on for status. 65 func (f *filesDiffPhase) handleDiffTimeFrames() error { 66 log.Info("Starting to handle files diffs...") 67 diffRangeStart, diffRangeEnd, err := f.stateManager.GetDiffHandlingRange() 68 if err != nil { 69 return err 70 } 71 72 f.transferManager = newTransferManager(f.phaseBase, getDelayUploadComparisonFunctions(f.repoSummary.PackageType)) 73 action := func(pcWrapper *producerConsumerWrapper, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) error { 74 // Create tasks to handle files diffs in time frames of searchTimeFramesMinutes. 75 // In case an error occurred while handling errors/delayed artifacts files - stop transferring. 76 curDiffTimeFrame := diffRangeStart 77 for diffRangeEnd.Sub(curDiffTimeFrame) > 0 && !ShouldStop(&f.phaseBase, &delayHelper, errorsChannelMng) { 78 diffTimeFrameHandler := f.createDiffTimeFrameHandlerFunc(pcWrapper, uploadChunkChan, delayHelper, errorsChannelMng) 79 _, err = pcWrapper.chunkBuilderProducerConsumer.AddTaskWithError(diffTimeFrameHandler(timeFrameParams{fromTime: curDiffTimeFrame}), pcWrapper.errorsQueue.AddError) 80 if err != nil { 81 return err 82 } 83 curDiffTimeFrame = curDiffTimeFrame.Add(searchTimeFramesMinutes * time.Minute) 84 } 85 return nil 86 } 87 delayAction := consumeDelayFilesIfNoErrors 88 err = f.transferManager.doTransferWithProducerConsumer(action, delayAction) 89 if err == nil { 90 log.Info("Done handling files diffs.") 91 } 92 return err 93 } 94 95 type diffTimeFrameHandlerFunc func(params timeFrameParams) parallel.TaskFunc 96 97 type timeFrameParams struct { 98 fromTime time.Time 99 } 100 101 func (f *filesDiffPhase) createDiffTimeFrameHandlerFunc(pcWrapper *producerConsumerWrapper, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) diffTimeFrameHandlerFunc { 102 return func(params timeFrameParams) parallel.TaskFunc { 103 return func(threadId int) error { 104 logMsgPrefix := clientUtils.GetLogMsgPrefix(threadId, false) 105 return f.handleTimeFrameFilesDiff(pcWrapper, params, logMsgPrefix, uploadChunkChan, delayHelper, errorsChannelMng) 106 } 107 } 108 } 109 110 func (f *filesDiffPhase) handleTimeFrameFilesDiff(pcWrapper *producerConsumerWrapper, params timeFrameParams, logMsgPrefix string, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) error { 111 fromTimestamp := params.fromTime.Format(time.RFC3339) 112 toTimestamp := params.fromTime.Add(searchTimeFramesMinutes * time.Minute).Format(time.RFC3339) 113 log.Debug(logMsgPrefix + "Searching time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'") 114 115 paginationI := 0 116 for { 117 result, lastPage, err := f.getTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationI) 118 if err != nil { 119 return err 120 } 121 if len(result) == 0 { 122 if paginationI == 0 { 123 log.Debug("No diffs were found in time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'") 124 } 125 break 126 } 127 files := convertResultsToFileRepresentation(result) 128 totalSize := 0 129 for _, r := range files { 130 totalSize += int(r.Size) 131 } 132 133 err = f.transferManager.stateManager.IncTotalSizeAndFilesPhase2(int64(len(files)), int64(totalSize)) 134 if err != nil { 135 return err 136 } 137 storage, _, _, _, err := f.transferManager.stateManager.GetStorageAndFilesRepoPointers(f.phaseId) 138 if err != nil { 139 return err 140 } 141 if f.progressBar != nil { 142 f.progressBar.phases[f.phaseId].GetTasksProgressBar().SetGeneralProgressTotal(*storage) 143 } 144 shouldStop, err := uploadByChunks(files, uploadChunkChan, f.phaseBase, delayHelper, errorsChannelMng, pcWrapper) 145 if err != nil || shouldStop { 146 return err 147 } 148 149 if lastPage { 150 break 151 } 152 paginationI++ 153 } 154 155 if f.progressBar != nil { 156 err := f.progressBar.IncrementPhase(f.phaseId) 157 if err != nil { 158 return err 159 } 160 } 161 log.Debug(logMsgPrefix + "Done handling time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'") 162 return nil 163 } 164 165 func convertResultsToFileRepresentation(results []servicesUtils.ResultItem) (files []api.FileRepresentation) { 166 for _, result := range results { 167 switch result.Type { 168 case "folder": 169 var pathInRepo string 170 if result.Path == "." { 171 pathInRepo = result.Name 172 } else { 173 pathInRepo = path.Join(result.Path, result.Name) 174 } 175 files = append(files, api.FileRepresentation{ 176 Repo: result.Repo, 177 Path: pathInRepo, 178 }) 179 default: 180 files = append(files, api.FileRepresentation{ 181 Repo: result.Repo, 182 Path: result.Path, 183 Name: result.Name, 184 Size: result.Size, 185 }) 186 } 187 } 188 return 189 } 190 191 // Get a list of changed files and folders between the input timestamps. 192 // fromTimestamp - Time in RFC3339 represents the start time 193 // toTimestamp - Time in RFC3339 represents the end time 194 // paginationOffset - Requested page 195 // Return values: 196 // result - The list of changed files and folders between the input timestamps 197 // lastPage - True if we are in the last AQL page and it is not needed to run another AQL requests 198 // err - The error, if any occurred 199 func (f *filesDiffPhase) getTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (result []servicesUtils.ResultItem, lastPage bool, err error) { 200 var timeFrameFilesDiff *servicesUtils.AqlSearchResult 201 if f.packageType == docker { 202 // Handle Docker repositories. 203 timeFrameFilesDiff, err = f.getDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationOffset) 204 } else { 205 // Handle all other (non docker) repository types. 206 timeFrameFilesDiff, err = f.getNonDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationOffset) 207 } 208 if err != nil { 209 return []servicesUtils.ResultItem{}, true, err 210 } 211 lastPage = len(timeFrameFilesDiff.Results) < AqlPaginationLimit 212 result, err = f.locallyGeneratedFilter.FilterLocallyGenerated(timeFrameFilesDiff.Results) 213 return 214 } 215 216 func (f *filesDiffPhase) getNonDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (aqlResult *servicesUtils.AqlSearchResult, err error) { 217 query := generateDiffAqlQuery(f.repoKey, fromTimestamp, toTimestamp, paginationOffset, f.disabledDistinctiveAql) 218 return runAql(f.context, f.srcRtDetails, query) 219 } 220 221 // We handle docker repositories differently from other repositories. 222 // The reason is as follows. If a docker layer already exists in Artifactory, and we try to upload it again to a different repository or a different path, 223 // its creation time will be the time of the initial upload, and not the latest one. This means that the layer will not be picked up and transferred as part of Phase 2. 224 // To avoid this situation, we look for all "manifest.json" and "list.manifest.json" files, and for each "manifest.json", we will run a search AQL in Artifactory 225 // to get all artifacts in its path (that includes the "manifest.json" file itself and all its layouts). 226 func (f *filesDiffPhase) getDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (aqlResult *servicesUtils.AqlSearchResult, err error) { 227 // Get all newly created or modified manifest files ("manifest.json" and "list.manifest.json" files) 228 query := generateDockerManifestAqlQuery(f.repoKey, fromTimestamp, toTimestamp, paginationOffset, f.disabledDistinctiveAql) 229 manifestFilesResult, err := runAql(f.context, f.srcRtDetails, query) 230 if err != nil { 231 return 232 } 233 var result []servicesUtils.ResultItem 234 if len(manifestFilesResult.Results) > 0 { 235 var manifestPaths []string 236 // Add the "list.manifest.json" files to the result, skip "manifest.json" files and save their paths separately. 237 for _, file := range manifestFilesResult.Results { 238 switch file.Name { 239 case "manifest.json": 240 manifestPaths = append(manifestPaths, file.Path) 241 case "list.manifest.json": 242 default: 243 err = errorutils.CheckErrorf("unexpected file name returned from AQL query. Expecting either 'manifest.json' or 'list.manifest.json'. Received '%s'.", file.Name) 244 return 245 } 246 } 247 if manifestPaths != nil { 248 // Get all content of Artifactory folders containing a "manifest.json" file. 249 query = generateGetDirContentAqlQuery(f.repoKey, manifestPaths) 250 var pathsResult *servicesUtils.AqlSearchResult 251 pathsResult, err = runAql(f.context, f.srcRtDetails, query) 252 if err != nil { 253 return 254 } 255 // Merge "list.manifest.json" files with all other files. 256 result = append(result, pathsResult.Results...) 257 } 258 } 259 aqlResult = &servicesUtils.AqlSearchResult{} 260 aqlResult.Results = result 261 return 262 } 263 264 func generateDiffAqlQuery(repoKey, fromTimestamp, toTimestamp string, paginationOffset int, disabledDistinctiveAql bool) string { 265 query := fmt.Sprintf(`items.find({"$and":[{"modified":{"$gte":"%s"}},{"modified":{"$lt":"%s"}},{"repo":"%s","type":"any"}]})`, fromTimestamp, toTimestamp, repoKey) 266 query += `.include("repo","path","name","type","modified","size")` 267 return query + generateAqlSortingPart(paginationOffset, disabledDistinctiveAql) 268 } 269 270 // This function generates an AQL that searches for all the content in the list of provided Artifactory paths. 271 func generateGetDirContentAqlQuery(repoKey string, paths []string) string { 272 query := `items.find({"$or":[` 273 for i, path := range paths { 274 query += fmt.Sprintf(`{"$and":[{"repo":"%s","path":{"$match":"%s"},"name":{"$match":"*"}}]}`, repoKey, path) 275 // Add comma for all paths except for the last one. 276 if i != len(paths)-1 { 277 query += "," 278 } 279 } 280 query += `]}).include("name","repo","path","sha256","size","type","modified","created")` 281 return query 282 } 283 284 // This function generates an AQL that searches for all files named "manifest.json" and "list.manifest.json" in a specific repository. 285 func generateDockerManifestAqlQuery(repoKey, fromTimestamp, toTimestamp string, paginationOffset int, disabledDistinctiveAql bool) string { 286 query := `items.find({"$and":` 287 query += fmt.Sprintf(`[{"repo":"%s"},{"modified":{"$gte":"%s"}},{"modified":{"$lt":"%s"}},{"$or":[{"name":"manifest.json"},{"name":"list.manifest.json"}]}`, repoKey, fromTimestamp, toTimestamp) 288 query += `]}).include("repo","path","name","type","modified")` 289 return query + generateAqlSortingPart(paginationOffset, disabledDistinctiveAql) 290 } 291 292 func generateAqlSortingPart(paginationOffset int, disabledDistinctiveAql bool) string { 293 sortingPart := fmt.Sprintf(`.sort({"$asc":["name","path"]}).offset(%d).limit(%d)`, paginationOffset*AqlPaginationLimit, AqlPaginationLimit) 294 sortingPart += appendDistinctIfNeeded(disabledDistinctiveAql) 295 return sortingPart 296 }