code.gitea.io/gitea@v1.21.7/services/repository/archiver/archiver.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package archiver
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"regexp"
    13  	"strings"
    14  	"time"
    15  
    16  	"code.gitea.io/gitea/models/db"
    17  	repo_model "code.gitea.io/gitea/models/repo"
    18  	"code.gitea.io/gitea/modules/git"
    19  	"code.gitea.io/gitea/modules/graceful"
    20  	"code.gitea.io/gitea/modules/log"
    21  	"code.gitea.io/gitea/modules/process"
    22  	"code.gitea.io/gitea/modules/queue"
    23  	"code.gitea.io/gitea/modules/setting"
    24  	"code.gitea.io/gitea/modules/storage"
    25  )
    26  
    27  // ArchiveRequest defines the parameters of an archive request, which notably
    28  // includes the specific repository being archived as well as the commit, the
    29  // name by which it was requested, and the kind of archive being requested.
    30  // This is entirely opaque to external entities, though, and mostly used as a
    31  // handle elsewhere.
    32  type ArchiveRequest struct {
    33  	RepoID   int64
    34  	refName  string
    35  	Type     git.ArchiveType
    36  	CommitID string
    37  }
    38  
    39  // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all
    40  // the way to 64.
    41  var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`)
    42  
    43  // ErrUnknownArchiveFormat request archive format is not supported
    44  type ErrUnknownArchiveFormat struct {
    45  	RequestFormat string
    46  }
    47  
    48  // Error implements error
    49  func (err ErrUnknownArchiveFormat) Error() string {
    50  	return fmt.Sprintf("unknown format: %s", err.RequestFormat)
    51  }
    52  
    53  // Is implements error
    54  func (ErrUnknownArchiveFormat) Is(err error) bool {
    55  	_, ok := err.(ErrUnknownArchiveFormat)
    56  	return ok
    57  }
    58  
    59  // RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found.
    60  type RepoRefNotFoundError struct {
    61  	RefName string
    62  }
    63  
    64  // Error implements error.
    65  func (e RepoRefNotFoundError) Error() string {
    66  	return fmt.Sprintf("unrecognized repository reference: %s", e.RefName)
    67  }
    68  
    69  func (e RepoRefNotFoundError) Is(err error) bool {
    70  	_, ok := err.(RepoRefNotFoundError)
    71  	return ok
    72  }
    73  
    74  // NewRequest creates an archival request, based on the URI.  The
    75  // resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
    76  // if it's determined that the request still needs to be satisfied.
    77  func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
    78  	r := &ArchiveRequest{
    79  		RepoID: repoID,
    80  	}
    81  
    82  	var ext string
    83  	switch {
    84  	case strings.HasSuffix(uri, ".zip"):
    85  		ext = ".zip"
    86  		r.Type = git.ZIP
    87  	case strings.HasSuffix(uri, ".tar.gz"):
    88  		ext = ".tar.gz"
    89  		r.Type = git.TARGZ
    90  	case strings.HasSuffix(uri, ".bundle"):
    91  		ext = ".bundle"
    92  		r.Type = git.BUNDLE
    93  	default:
    94  		return nil, ErrUnknownArchiveFormat{RequestFormat: uri}
    95  	}
    96  
    97  	r.refName = strings.TrimSuffix(uri, ext)
    98  
    99  	var err error
   100  	// Get corresponding commit.
   101  	if repo.IsBranchExist(r.refName) {
   102  		r.CommitID, err = repo.GetBranchCommitID(r.refName)
   103  		if err != nil {
   104  			return nil, err
   105  		}
   106  	} else if repo.IsTagExist(r.refName) {
   107  		r.CommitID, err = repo.GetTagCommitID(r.refName)
   108  		if err != nil {
   109  			return nil, err
   110  		}
   111  	} else if shaRegex.MatchString(r.refName) {
   112  		if repo.IsCommitExist(r.refName) {
   113  			r.CommitID = r.refName
   114  		} else {
   115  			return nil, git.ErrNotExist{
   116  				ID: r.refName,
   117  			}
   118  		}
   119  	} else {
   120  		return nil, RepoRefNotFoundError{RefName: r.refName}
   121  	}
   122  
   123  	return r, nil
   124  }
   125  
   126  // GetArchiveName returns the name of the caller, based on the ref used by the
   127  // caller to create this request.
   128  func (aReq *ArchiveRequest) GetArchiveName() string {
   129  	return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
   130  }
   131  
   132  // Await awaits the completion of an ArchiveRequest. If the archive has
   133  // already been prepared the method returns immediately. Otherwise an archiver
   134  // process will be started and its completion awaited. On success the returned
   135  // RepoArchiver may be used to download the archive. Note that even if the
   136  // context is cancelled/times out a started archiver will still continue to run
   137  // in the background.
   138  func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) {
   139  	archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
   140  	if err != nil {
   141  		return nil, fmt.Errorf("models.GetRepoArchiver: %w", err)
   142  	}
   143  
   144  	if archiver != nil && archiver.Status == repo_model.ArchiverReady {
   145  		// Archive already generated, we're done.
   146  		return archiver, nil
   147  	}
   148  
   149  	if err := StartArchive(aReq); err != nil {
   150  		return nil, fmt.Errorf("archiver.StartArchive: %w", err)
   151  	}
   152  
   153  	poll := time.NewTicker(time.Second * 1)
   154  	defer poll.Stop()
   155  
   156  	for {
   157  		select {
   158  		case <-graceful.GetManager().HammerContext().Done():
   159  			// System stopped.
   160  			return nil, graceful.GetManager().HammerContext().Err()
   161  		case <-ctx.Done():
   162  			return nil, ctx.Err()
   163  		case <-poll.C:
   164  			archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
   165  			if err != nil {
   166  				return nil, fmt.Errorf("repo_model.GetRepoArchiver: %w", err)
   167  			}
   168  			if archiver != nil && archiver.Status == repo_model.ArchiverReady {
   169  				return archiver, nil
   170  			}
   171  		}
   172  	}
   173  }
   174  
   175  func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
   176  	txCtx, committer, err := db.TxContext(db.DefaultContext)
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	defer committer.Close()
   181  	ctx, _, finished := process.GetManager().AddContext(txCtx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName()))
   182  	defer finished()
   183  
   184  	archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	if archiver != nil {
   190  		// FIXME: If another process are generating it, we think it's not ready and just return
   191  		// Or we should wait until the archive generated.
   192  		if archiver.Status == repo_model.ArchiverGenerating {
   193  			return nil, nil
   194  		}
   195  	} else {
   196  		archiver = &repo_model.RepoArchiver{
   197  			RepoID:   r.RepoID,
   198  			Type:     r.Type,
   199  			CommitID: r.CommitID,
   200  			Status:   repo_model.ArchiverGenerating,
   201  		}
   202  		if err := repo_model.AddRepoArchiver(ctx, archiver); err != nil {
   203  			return nil, err
   204  		}
   205  	}
   206  
   207  	rPath := archiver.RelativePath()
   208  	_, err = storage.RepoArchives.Stat(rPath)
   209  	if err == nil {
   210  		if archiver.Status == repo_model.ArchiverGenerating {
   211  			archiver.Status = repo_model.ArchiverReady
   212  			if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
   213  				return nil, err
   214  			}
   215  		}
   216  		return archiver, committer.Commit()
   217  	}
   218  
   219  	if !errors.Is(err, os.ErrNotExist) {
   220  		return nil, fmt.Errorf("unable to stat archive: %w", err)
   221  	}
   222  
   223  	rd, w := io.Pipe()
   224  	defer func() {
   225  		w.Close()
   226  		rd.Close()
   227  	}()
   228  	done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish
   229  	repo, err := repo_model.GetRepositoryByID(ctx, archiver.RepoID)
   230  	if err != nil {
   231  		return nil, fmt.Errorf("archiver.LoadRepo failed: %w", err)
   232  	}
   233  
   234  	gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
   235  	if err != nil {
   236  		return nil, err
   237  	}
   238  	defer gitRepo.Close()
   239  
   240  	go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) {
   241  		defer func() {
   242  			if r := recover(); r != nil {
   243  				done <- fmt.Errorf("%v", r)
   244  			}
   245  		}()
   246  
   247  		if archiver.Type == git.BUNDLE {
   248  			err = gitRepo.CreateBundle(
   249  				ctx,
   250  				archiver.CommitID,
   251  				w,
   252  			)
   253  		} else {
   254  			err = gitRepo.CreateArchive(
   255  				ctx,
   256  				archiver.Type,
   257  				w,
   258  				setting.Repository.PrefixArchiveFiles,
   259  				archiver.CommitID,
   260  			)
   261  		}
   262  		_ = w.CloseWithError(err)
   263  		done <- err
   264  	}(done, w, archiver, gitRepo)
   265  
   266  	// TODO: add lfs data to zip
   267  	// TODO: add submodule data to zip
   268  
   269  	if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil {
   270  		return nil, fmt.Errorf("unable to write archive: %w", err)
   271  	}
   272  
   273  	err = <-done
   274  	if err != nil {
   275  		return nil, err
   276  	}
   277  
   278  	if archiver.Status == repo_model.ArchiverGenerating {
   279  		archiver.Status = repo_model.ArchiverReady
   280  		if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
   281  			return nil, err
   282  		}
   283  	}
   284  
   285  	return archiver, committer.Commit()
   286  }
   287  
   288  // ArchiveRepository satisfies the ArchiveRequest being passed in.  Processing
   289  // will occur in a separate goroutine, as this phase may take a while to
   290  // complete.  If the archive already exists, ArchiveRepository will not do
   291  // anything.  In all cases, the caller should be examining the *ArchiveRequest
   292  // being returned for completion, as it may be different than the one they passed
   293  // in.
   294  func ArchiveRepository(request *ArchiveRequest) (*repo_model.RepoArchiver, error) {
   295  	return doArchive(request)
   296  }
   297  
   298  var archiverQueue *queue.WorkerPoolQueue[*ArchiveRequest]
   299  
   300  // Init initializes archiver
   301  func Init() error {
   302  	handler := func(items ...*ArchiveRequest) []*ArchiveRequest {
   303  		for _, archiveReq := range items {
   304  			log.Trace("ArchiverData Process: %#v", archiveReq)
   305  			if _, err := doArchive(archiveReq); err != nil {
   306  				log.Error("Archive %v failed: %v", archiveReq, err)
   307  			}
   308  		}
   309  		return nil
   310  	}
   311  
   312  	archiverQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo-archive", handler)
   313  	if archiverQueue == nil {
   314  		return errors.New("unable to create repo-archive queue")
   315  	}
   316  	go graceful.GetManager().RunWithCancel(archiverQueue)
   317  
   318  	return nil
   319  }
   320  
   321  // StartArchive push the archive request to the queue
   322  func StartArchive(request *ArchiveRequest) error {
   323  	has, err := archiverQueue.Has(request)
   324  	if err != nil {
   325  		return err
   326  	}
   327  	if has {
   328  		return nil
   329  	}
   330  	return archiverQueue.Push(request)
   331  }
   332  
   333  func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error {
   334  	if err := repo_model.DeleteRepoArchiver(ctx, archiver); err != nil {
   335  		return err
   336  	}
   337  	p := archiver.RelativePath()
   338  	if err := storage.RepoArchives.Delete(p); err != nil {
   339  		log.Error("delete repo archive file failed: %v", err)
   340  	}
   341  	return nil
   342  }
   343  
   344  // DeleteOldRepositoryArchives deletes old repository archives.
   345  func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error {
   346  	log.Trace("Doing: ArchiveCleanup")
   347  
   348  	for {
   349  		archivers, err := repo_model.FindRepoArchives(ctx, repo_model.FindRepoArchiversOption{
   350  			ListOptions: db.ListOptions{
   351  				PageSize: 100,
   352  				Page:     1,
   353  			},
   354  			OlderThan: olderThan,
   355  		})
   356  		if err != nil {
   357  			log.Trace("Error: ArchiveClean: %v", err)
   358  			return err
   359  		}
   360  
   361  		for _, archiver := range archivers {
   362  			if err := deleteOldRepoArchiver(ctx, archiver); err != nil {
   363  				return err
   364  			}
   365  		}
   366  		if len(archivers) < 100 {
   367  			break
   368  		}
   369  	}
   370  
   371  	log.Trace("Finished: ArchiveCleanup")
   372  	return nil
   373  }
   374  
   375  // DeleteRepositoryArchives deletes all repositories' archives.
   376  func DeleteRepositoryArchives(ctx context.Context) error {
   377  	if err := repo_model.DeleteAllRepoArchives(ctx); err != nil {
   378  		return err
   379  	}
   380  	return storage.Clean(storage.RepoArchives)
   381  }