github.com/jfrog/jfrog-cli-core/v2@v2.51.0/artifactory/commands/transferfiles/filesdiff.go (about)

     1  package transferfiles
     2  
     3  import (
     4  	"fmt"
     5  	"path"
     6  	"time"
     7  
     8  	"github.com/jfrog/gofrog/parallel"
     9  	"github.com/jfrog/jfrog-cli-core/v2/artifactory/commands/transferfiles/api"
    10  	servicesUtils "github.com/jfrog/jfrog-client-go/artifactory/services/utils"
    11  	clientUtils "github.com/jfrog/jfrog-client-go/utils"
    12  	"github.com/jfrog/jfrog-client-go/utils/errorutils"
    13  	"github.com/jfrog/jfrog-client-go/utils/log"
    14  )
    15  
    16  // When handling files diff, we split the whole time range being handled by searchTimeFramesMinutes in order to receive smaller results from the AQLs.
    17  const searchTimeFramesMinutes = 15
    18  
    19  // Manages the phase of fixing files diffs (files that were created/modified after they were transferred),
    20  // and handling transfer failures that have been collected during previous runs and phases.
    21  type filesDiffPhase struct {
    22  	phaseBase
    23  }
    24  
    25  func (f *filesDiffPhase) initProgressBar() error {
    26  	if f.progressBar != nil {
    27  		f.progressBar.AddPhase2()
    28  	}
    29  	return nil
    30  }
    31  
    32  func (f *filesDiffPhase) getPhaseName() string {
    33  	return "Files Diff Handling Phase"
    34  }
    35  
    36  func (f *filesDiffPhase) phaseStarted() error {
    37  	f.startTime = time.Now()
    38  	return f.stateManager.AddNewDiffToState(f.startTime)
    39  }
    40  
    41  func (f *filesDiffPhase) phaseDone() error {
    42  	// If the phase stopped gracefully, don't mark the phase as completed
    43  	if !f.ShouldStop() {
    44  		if err := f.stateManager.SetFilesDiffHandlingCompleted(); err != nil {
    45  			return err
    46  		}
    47  	}
    48  
    49  	if f.progressBar != nil {
    50  		return f.progressBar.DonePhase(f.phaseId)
    51  	}
    52  	return nil
    53  }
    54  
    55  func (f *filesDiffPhase) shouldSkipPhase() (bool, error) {
    56  	return false, nil
    57  }
    58  
    59  func (f *filesDiffPhase) run() error {
    60  	return f.handleDiffTimeFrames()
    61  }
    62  
    63  // Split the time range of fixing files diffs into smaller time frames and handle them separately with smaller AQLs.
    64  // Diffs found (files created/modifies) are uploaded in chunks, then polled on for status.
    65  func (f *filesDiffPhase) handleDiffTimeFrames() error {
    66  	log.Info("Starting to handle files diffs...")
    67  	diffRangeStart, diffRangeEnd, err := f.stateManager.GetDiffHandlingRange()
    68  	if err != nil {
    69  		return err
    70  	}
    71  
    72  	f.transferManager = newTransferManager(f.phaseBase, getDelayUploadComparisonFunctions(f.repoSummary.PackageType))
    73  	action := func(pcWrapper *producerConsumerWrapper, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) error {
    74  		// Create tasks to handle files diffs in time frames of searchTimeFramesMinutes.
    75  		// In case an error occurred while handling errors/delayed artifacts files - stop transferring.
    76  		curDiffTimeFrame := diffRangeStart
    77  		for diffRangeEnd.Sub(curDiffTimeFrame) > 0 && !ShouldStop(&f.phaseBase, &delayHelper, errorsChannelMng) {
    78  			diffTimeFrameHandler := f.createDiffTimeFrameHandlerFunc(pcWrapper, uploadChunkChan, delayHelper, errorsChannelMng)
    79  			_, err = pcWrapper.chunkBuilderProducerConsumer.AddTaskWithError(diffTimeFrameHandler(timeFrameParams{fromTime: curDiffTimeFrame}), pcWrapper.errorsQueue.AddError)
    80  			if err != nil {
    81  				return err
    82  			}
    83  			curDiffTimeFrame = curDiffTimeFrame.Add(searchTimeFramesMinutes * time.Minute)
    84  		}
    85  		return nil
    86  	}
    87  	delayAction := consumeDelayFilesIfNoErrors
    88  	err = f.transferManager.doTransferWithProducerConsumer(action, delayAction)
    89  	if err == nil {
    90  		log.Info("Done handling files diffs.")
    91  	}
    92  	return err
    93  }
    94  
    95  type diffTimeFrameHandlerFunc func(params timeFrameParams) parallel.TaskFunc
    96  
    97  type timeFrameParams struct {
    98  	fromTime time.Time
    99  }
   100  
   101  func (f *filesDiffPhase) createDiffTimeFrameHandlerFunc(pcWrapper *producerConsumerWrapper, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) diffTimeFrameHandlerFunc {
   102  	return func(params timeFrameParams) parallel.TaskFunc {
   103  		return func(threadId int) error {
   104  			logMsgPrefix := clientUtils.GetLogMsgPrefix(threadId, false)
   105  			return f.handleTimeFrameFilesDiff(pcWrapper, params, logMsgPrefix, uploadChunkChan, delayHelper, errorsChannelMng)
   106  		}
   107  	}
   108  }
   109  
   110  func (f *filesDiffPhase) handleTimeFrameFilesDiff(pcWrapper *producerConsumerWrapper, params timeFrameParams, logMsgPrefix string, uploadChunkChan chan UploadedChunk, delayHelper delayUploadHelper, errorsChannelMng *ErrorsChannelMng) error {
   111  	fromTimestamp := params.fromTime.Format(time.RFC3339)
   112  	toTimestamp := params.fromTime.Add(searchTimeFramesMinutes * time.Minute).Format(time.RFC3339)
   113  	log.Debug(logMsgPrefix + "Searching time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'")
   114  
   115  	paginationI := 0
   116  	for {
   117  		result, lastPage, err := f.getTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationI)
   118  		if err != nil {
   119  			return err
   120  		}
   121  		if len(result) == 0 {
   122  			if paginationI == 0 {
   123  				log.Debug("No diffs were found in time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'")
   124  			}
   125  			break
   126  		}
   127  		files := convertResultsToFileRepresentation(result)
   128  		totalSize := 0
   129  		for _, r := range files {
   130  			totalSize += int(r.Size)
   131  		}
   132  
   133  		err = f.transferManager.stateManager.IncTotalSizeAndFilesPhase2(int64(len(files)), int64(totalSize))
   134  		if err != nil {
   135  			return err
   136  		}
   137  		storage, _, _, _, err := f.transferManager.stateManager.GetStorageAndFilesRepoPointers(f.phaseId)
   138  		if err != nil {
   139  			return err
   140  		}
   141  		if f.progressBar != nil {
   142  			f.progressBar.phases[f.phaseId].GetTasksProgressBar().SetGeneralProgressTotal(*storage)
   143  		}
   144  		shouldStop, err := uploadByChunks(files, uploadChunkChan, f.phaseBase, delayHelper, errorsChannelMng, pcWrapper)
   145  		if err != nil || shouldStop {
   146  			return err
   147  		}
   148  
   149  		if lastPage {
   150  			break
   151  		}
   152  		paginationI++
   153  	}
   154  
   155  	if f.progressBar != nil {
   156  		err := f.progressBar.IncrementPhase(f.phaseId)
   157  		if err != nil {
   158  			return err
   159  		}
   160  	}
   161  	log.Debug(logMsgPrefix + "Done handling time frame: '" + fromTimestamp + "' to '" + toTimestamp + "'")
   162  	return nil
   163  }
   164  
   165  func convertResultsToFileRepresentation(results []servicesUtils.ResultItem) (files []api.FileRepresentation) {
   166  	for _, result := range results {
   167  		switch result.Type {
   168  		case "folder":
   169  			var pathInRepo string
   170  			if result.Path == "." {
   171  				pathInRepo = result.Name
   172  			} else {
   173  				pathInRepo = path.Join(result.Path, result.Name)
   174  			}
   175  			files = append(files, api.FileRepresentation{
   176  				Repo: result.Repo,
   177  				Path: pathInRepo,
   178  			})
   179  		default:
   180  			files = append(files, api.FileRepresentation{
   181  				Repo: result.Repo,
   182  				Path: result.Path,
   183  				Name: result.Name,
   184  				Size: result.Size,
   185  			})
   186  		}
   187  	}
   188  	return
   189  }
   190  
   191  // Get a list of changed files and folders between the input timestamps.
   192  // fromTimestamp - Time in RFC3339 represents the start time
   193  // toTimestamp - Time in RFC3339 represents the end time
   194  // paginationOffset - Requested page
   195  // Return values:
   196  // result - The list of changed files and folders between the input timestamps
   197  // lastPage - True if we are in the last AQL page and it is not needed to run another AQL requests
   198  // err - The error, if any occurred
   199  func (f *filesDiffPhase) getTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (result []servicesUtils.ResultItem, lastPage bool, err error) {
   200  	var timeFrameFilesDiff *servicesUtils.AqlSearchResult
   201  	if f.packageType == docker {
   202  		// Handle Docker repositories.
   203  		timeFrameFilesDiff, err = f.getDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationOffset)
   204  	} else {
   205  		// Handle all other (non docker) repository types.
   206  		timeFrameFilesDiff, err = f.getNonDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp, paginationOffset)
   207  	}
   208  	if err != nil {
   209  		return []servicesUtils.ResultItem{}, true, err
   210  	}
   211  	lastPage = len(timeFrameFilesDiff.Results) < AqlPaginationLimit
   212  	result, err = f.locallyGeneratedFilter.FilterLocallyGenerated(timeFrameFilesDiff.Results)
   213  	return
   214  }
   215  
   216  func (f *filesDiffPhase) getNonDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (aqlResult *servicesUtils.AqlSearchResult, err error) {
   217  	query := generateDiffAqlQuery(f.repoKey, fromTimestamp, toTimestamp, paginationOffset, f.disabledDistinctiveAql)
   218  	return runAql(f.context, f.srcRtDetails, query)
   219  }
   220  
   221  // We handle docker repositories differently from other repositories.
   222  // The reason is as follows. If a docker layer already exists in Artifactory, and we try to upload it again to a different repository or a different path,
   223  // its creation time will be the time of the initial upload, and not the latest one. This means that the layer will not be picked up and transferred as part of Phase 2.
   224  // To avoid this situation, we look for all "manifest.json" and "list.manifest.json" files, and for each "manifest.json", we will run a search AQL in Artifactory
   225  // to get all artifacts in its path (that includes the "manifest.json" file itself and all its layouts).
   226  func (f *filesDiffPhase) getDockerTimeFrameFilesDiff(fromTimestamp, toTimestamp string, paginationOffset int) (aqlResult *servicesUtils.AqlSearchResult, err error) {
   227  	// Get all newly created or modified manifest files ("manifest.json" and "list.manifest.json" files)
   228  	query := generateDockerManifestAqlQuery(f.repoKey, fromTimestamp, toTimestamp, paginationOffset, f.disabledDistinctiveAql)
   229  	manifestFilesResult, err := runAql(f.context, f.srcRtDetails, query)
   230  	if err != nil {
   231  		return
   232  	}
   233  	var result []servicesUtils.ResultItem
   234  	if len(manifestFilesResult.Results) > 0 {
   235  		var manifestPaths []string
   236  		// Add the "list.manifest.json" files to the result, skip "manifest.json" files and save their paths separately.
   237  		for _, file := range manifestFilesResult.Results {
   238  			switch file.Name {
   239  			case "manifest.json":
   240  				manifestPaths = append(manifestPaths, file.Path)
   241  			case "list.manifest.json":
   242  			default:
   243  				err = errorutils.CheckErrorf("unexpected file name returned from AQL query. Expecting either 'manifest.json' or 'list.manifest.json'. Received '%s'.", file.Name)
   244  				return
   245  			}
   246  		}
   247  		if manifestPaths != nil {
   248  			// Get all content of Artifactory folders containing a "manifest.json" file.
   249  			query = generateGetDirContentAqlQuery(f.repoKey, manifestPaths)
   250  			var pathsResult *servicesUtils.AqlSearchResult
   251  			pathsResult, err = runAql(f.context, f.srcRtDetails, query)
   252  			if err != nil {
   253  				return
   254  			}
   255  			// Merge "list.manifest.json" files with all other files.
   256  			result = append(result, pathsResult.Results...)
   257  		}
   258  	}
   259  	aqlResult = &servicesUtils.AqlSearchResult{}
   260  	aqlResult.Results = result
   261  	return
   262  }
   263  
   264  func generateDiffAqlQuery(repoKey, fromTimestamp, toTimestamp string, paginationOffset int, disabledDistinctiveAql bool) string {
   265  	query := fmt.Sprintf(`items.find({"$and":[{"modified":{"$gte":"%s"}},{"modified":{"$lt":"%s"}},{"repo":"%s","type":"any"}]})`, fromTimestamp, toTimestamp, repoKey)
   266  	query += `.include("repo","path","name","type","modified","size")`
   267  	return query + generateAqlSortingPart(paginationOffset, disabledDistinctiveAql)
   268  }
   269  
   270  // This function generates an AQL that searches for all the content in the list of provided Artifactory paths.
   271  func generateGetDirContentAqlQuery(repoKey string, paths []string) string {
   272  	query := `items.find({"$or":[`
   273  	for i, path := range paths {
   274  		query += fmt.Sprintf(`{"$and":[{"repo":"%s","path":{"$match":"%s"},"name":{"$match":"*"}}]}`, repoKey, path)
   275  		// Add comma for all paths except for the last one.
   276  		if i != len(paths)-1 {
   277  			query += ","
   278  		}
   279  	}
   280  	query += `]}).include("name","repo","path","sha256","size","type","modified","created")`
   281  	return query
   282  }
   283  
   284  // This function generates an AQL that searches for all files named "manifest.json" and "list.manifest.json" in a specific repository.
   285  func generateDockerManifestAqlQuery(repoKey, fromTimestamp, toTimestamp string, paginationOffset int, disabledDistinctiveAql bool) string {
   286  	query := `items.find({"$and":`
   287  	query += fmt.Sprintf(`[{"repo":"%s"},{"modified":{"$gte":"%s"}},{"modified":{"$lt":"%s"}},{"$or":[{"name":"manifest.json"},{"name":"list.manifest.json"}]}`, repoKey, fromTimestamp, toTimestamp)
   288  	query += `]}).include("repo","path","name","type","modified")`
   289  	return query + generateAqlSortingPart(paginationOffset, disabledDistinctiveAql)
   290  }
   291  
   292  func generateAqlSortingPart(paginationOffset int, disabledDistinctiveAql bool) string {
   293  	sortingPart := fmt.Sprintf(`.sort({"$asc":["name","path"]}).offset(%d).limit(%d)`, paginationOffset*AqlPaginationLimit, AqlPaginationLimit)
   294  	sortingPart += appendDistinctIfNeeded(disabledDistinctiveAql)
   295  	return sortingPart
   296  }