code.gitea.io/gitea@v1.21.7/services/repository/lfs.go (about)

     1  // Copyright 2022 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package repository
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"time"
    11  
    12  	git_model "code.gitea.io/gitea/models/git"
    13  	repo_model "code.gitea.io/gitea/models/repo"
    14  	"code.gitea.io/gitea/modules/git"
    15  	"code.gitea.io/gitea/modules/lfs"
    16  	"code.gitea.io/gitea/modules/log"
    17  	"code.gitea.io/gitea/modules/setting"
    18  	"code.gitea.io/gitea/modules/timeutil"
    19  )
    20  
    21  // GarbageCollectLFSMetaObjectsOptions provides options for GarbageCollectLFSMetaObjects function
    22  type GarbageCollectLFSMetaObjectsOptions struct {
    23  	LogDetail                func(format string, v ...any)
    24  	AutoFix                  bool
    25  	OlderThan                time.Time
    26  	UpdatedLessRecentlyThan  time.Time
    27  	NumberToCheckPerRepo     int64
    28  	ProportionToCheckPerRepo float64
    29  }
    30  
    31  // GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
    32  func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMetaObjectsOptions) error {
    33  	log.Trace("Doing: GarbageCollectLFSMetaObjects")
    34  	defer log.Trace("Finished: GarbageCollectLFSMetaObjects")
    35  
    36  	if opts.LogDetail == nil {
    37  		opts.LogDetail = log.Debug
    38  	}
    39  
    40  	if !setting.LFS.StartServer {
    41  		opts.LogDetail("LFS support is disabled")
    42  		return nil
    43  	}
    44  
    45  	return git_model.IterateRepositoryIDsWithLFSMetaObjects(ctx, func(ctx context.Context, repoID, count int64) error {
    46  		repo, err := repo_model.GetRepositoryByID(ctx, repoID)
    47  		if err != nil {
    48  			return err
    49  		}
    50  
    51  		if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
    52  			opts.NumberToCheckPerRepo = newMinimum
    53  		}
    54  		return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
    55  	})
    56  }
    57  
    58  // GarbageCollectLFSMetaObjectsForRepo garbage collects LFS objects for a specific repository
    59  func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, opts GarbageCollectLFSMetaObjectsOptions) error {
    60  	opts.LogDetail("Checking %-v", repo)
    61  	total, orphaned, collected, deleted := int64(0), 0, 0, 0
    62  	defer func() {
    63  		if orphaned == 0 {
    64  			opts.LogDetail("Found %d total LFSMetaObjects in %-v", total, repo)
    65  		} else if !opts.AutoFix {
    66  			opts.LogDetail("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
    67  		} else {
    68  			opts.LogDetail("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
    69  		}
    70  	}()
    71  
    72  	gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
    73  	if err != nil {
    74  		log.Error("Unable to open git repository %-v: %v", repo, err)
    75  		return err
    76  	}
    77  	defer gitRepo.Close()
    78  
    79  	store := lfs.NewContentStore()
    80  	errStop := errors.New("STOPERR")
    81  
    82  	err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
    83  		if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
    84  			return errStop
    85  		}
    86  		total++
    87  		pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))
    88  
    89  		if gitRepo.IsObjectExist(pointerSha.String()) {
    90  			return git_model.MarkLFSMetaObject(ctx, metaObject.ID)
    91  		}
    92  		orphaned++
    93  
    94  		if !opts.AutoFix {
    95  			return nil
    96  		}
    97  		// Non-existent pointer file
    98  		_, err = git_model.RemoveLFSMetaObjectByOidFn(ctx, repo.ID, metaObject.Oid, func(count int64) error {
    99  			if count > 0 {
   100  				return nil
   101  			}
   102  
   103  			if err := store.Delete(metaObject.RelativePath()); err != nil {
   104  				log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
   105  			}
   106  			deleted++
   107  			return nil
   108  		})
   109  		if err != nil {
   110  			return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
   111  		}
   112  		collected++
   113  
   114  		return nil
   115  	}, &git_model.IterateLFSMetaObjectsForRepoOptions{
   116  		// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
   117  		// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
   118  		// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
   119  		// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
   120  		// objects.
   121  		//
   122  		// It is likely that a week is potentially excessive but it should definitely be enough that any
   123  		// unassociated LFS object is genuinely unassociated.
   124  		OlderThan:                 timeutil.TimeStamp(opts.OlderThan.Unix()),
   125  		UpdatedLessRecentlyThan:   timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
   126  		OrderByUpdated:            true,
   127  		LoopFunctionAlwaysUpdates: true,
   128  	})
   129  
   130  	if err == errStop {
   131  		opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
   132  		return nil
   133  	} else if err != nil {
   134  		return err
   135  	}
   136  	return nil
   137  }