code.gitea.io/gitea@v1.22.3/services/repository/archiver/archiver.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package archiver
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"strings"
    13  	"time"
    14  
    15  	"code.gitea.io/gitea/models/db"
    16  	repo_model "code.gitea.io/gitea/models/repo"
    17  	"code.gitea.io/gitea/modules/git"
    18  	"code.gitea.io/gitea/modules/gitrepo"
    19  	"code.gitea.io/gitea/modules/graceful"
    20  	"code.gitea.io/gitea/modules/log"
    21  	"code.gitea.io/gitea/modules/process"
    22  	"code.gitea.io/gitea/modules/queue"
    23  	"code.gitea.io/gitea/modules/setting"
    24  	"code.gitea.io/gitea/modules/storage"
    25  )
    26  
    27  // ArchiveRequest defines the parameters of an archive request, which notably
    28  // includes the specific repository being archived as well as the commit, the
    29  // name by which it was requested, and the kind of archive being requested.
    30  // This is entirely opaque to external entities, though, and mostly used as a
    31  // handle elsewhere.
    32  type ArchiveRequest struct {
    33  	RepoID   int64
    34  	refName  string
    35  	Type     git.ArchiveType
    36  	CommitID string
    37  }
    38  
    39  // ErrUnknownArchiveFormat request archive format is not supported
    40  type ErrUnknownArchiveFormat struct {
    41  	RequestFormat string
    42  }
    43  
    44  // Error implements error
    45  func (err ErrUnknownArchiveFormat) Error() string {
    46  	return fmt.Sprintf("unknown format: %s", err.RequestFormat)
    47  }
    48  
    49  // Is implements error
    50  func (ErrUnknownArchiveFormat) Is(err error) bool {
    51  	_, ok := err.(ErrUnknownArchiveFormat)
    52  	return ok
    53  }
    54  
    55  // RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found.
    56  type RepoRefNotFoundError struct {
    57  	RefName string
    58  }
    59  
    60  // Error implements error.
    61  func (e RepoRefNotFoundError) Error() string {
    62  	return fmt.Sprintf("unrecognized repository reference: %s", e.RefName)
    63  }
    64  
    65  func (e RepoRefNotFoundError) Is(err error) bool {
    66  	_, ok := err.(RepoRefNotFoundError)
    67  	return ok
    68  }
    69  
    70  // NewRequest creates an archival request, based on the URI.  The
    71  // resulting ArchiveRequest is suitable for being passed to ArchiveRepository()
    72  // if it's determined that the request still needs to be satisfied.
    73  func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) {
    74  	r := &ArchiveRequest{
    75  		RepoID: repoID,
    76  	}
    77  
    78  	var ext string
    79  	switch {
    80  	case strings.HasSuffix(uri, ".zip"):
    81  		ext = ".zip"
    82  		r.Type = git.ZIP
    83  	case strings.HasSuffix(uri, ".tar.gz"):
    84  		ext = ".tar.gz"
    85  		r.Type = git.TARGZ
    86  	case strings.HasSuffix(uri, ".bundle"):
    87  		ext = ".bundle"
    88  		r.Type = git.BUNDLE
    89  	default:
    90  		return nil, ErrUnknownArchiveFormat{RequestFormat: uri}
    91  	}
    92  
    93  	r.refName = strings.TrimSuffix(uri, ext)
    94  
    95  	// Get corresponding commit.
    96  	commitID, err := repo.ConvertToGitID(r.refName)
    97  	if err != nil {
    98  		return nil, RepoRefNotFoundError{RefName: r.refName}
    99  	}
   100  
   101  	r.CommitID = commitID.String()
   102  	return r, nil
   103  }
   104  
   105  // GetArchiveName returns the name of the caller, based on the ref used by the
   106  // caller to create this request.
   107  func (aReq *ArchiveRequest) GetArchiveName() string {
   108  	return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String()
   109  }
   110  
   111  // Await awaits the completion of an ArchiveRequest. If the archive has
   112  // already been prepared the method returns immediately. Otherwise an archiver
   113  // process will be started and its completion awaited. On success the returned
   114  // RepoArchiver may be used to download the archive. Note that even if the
   115  // context is cancelled/times out a started archiver will still continue to run
   116  // in the background.
   117  func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) {
   118  	archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
   119  	if err != nil {
   120  		return nil, fmt.Errorf("models.GetRepoArchiver: %w", err)
   121  	}
   122  
   123  	if archiver != nil && archiver.Status == repo_model.ArchiverReady {
   124  		// Archive already generated, we're done.
   125  		return archiver, nil
   126  	}
   127  
   128  	if err := StartArchive(aReq); err != nil {
   129  		return nil, fmt.Errorf("archiver.StartArchive: %w", err)
   130  	}
   131  
   132  	poll := time.NewTicker(time.Second * 1)
   133  	defer poll.Stop()
   134  
   135  	for {
   136  		select {
   137  		case <-graceful.GetManager().HammerContext().Done():
   138  			// System stopped.
   139  			return nil, graceful.GetManager().HammerContext().Err()
   140  		case <-ctx.Done():
   141  			return nil, ctx.Err()
   142  		case <-poll.C:
   143  			archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
   144  			if err != nil {
   145  				return nil, fmt.Errorf("repo_model.GetRepoArchiver: %w", err)
   146  			}
   147  			if archiver != nil && archiver.Status == repo_model.ArchiverReady {
   148  				return archiver, nil
   149  			}
   150  		}
   151  	}
   152  }
   153  
   154  func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
   155  	txCtx, committer, err := db.TxContext(ctx)
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  	defer committer.Close()
   160  	ctx, _, finished := process.GetManager().AddContext(txCtx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName()))
   161  	defer finished()
   162  
   163  	archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	if archiver != nil {
   169  		// FIXME: If another process are generating it, we think it's not ready and just return
   170  		// Or we should wait until the archive generated.
   171  		if archiver.Status == repo_model.ArchiverGenerating {
   172  			return nil, nil
   173  		}
   174  	} else {
   175  		archiver = &repo_model.RepoArchiver{
   176  			RepoID:   r.RepoID,
   177  			Type:     r.Type,
   178  			CommitID: r.CommitID,
   179  			Status:   repo_model.ArchiverGenerating,
   180  		}
   181  		if err := db.Insert(ctx, archiver); err != nil {
   182  			return nil, err
   183  		}
   184  	}
   185  
   186  	rPath := archiver.RelativePath()
   187  	_, err = storage.RepoArchives.Stat(rPath)
   188  	if err == nil {
   189  		if archiver.Status == repo_model.ArchiverGenerating {
   190  			archiver.Status = repo_model.ArchiverReady
   191  			if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
   192  				return nil, err
   193  			}
   194  		}
   195  		return archiver, committer.Commit()
   196  	}
   197  
   198  	if !errors.Is(err, os.ErrNotExist) {
   199  		return nil, fmt.Errorf("unable to stat archive: %w", err)
   200  	}
   201  
   202  	rd, w := io.Pipe()
   203  	defer func() {
   204  		w.Close()
   205  		rd.Close()
   206  	}()
   207  	done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish
   208  	repo, err := repo_model.GetRepositoryByID(ctx, archiver.RepoID)
   209  	if err != nil {
   210  		return nil, fmt.Errorf("archiver.LoadRepo failed: %w", err)
   211  	}
   212  
   213  	gitRepo, err := gitrepo.OpenRepository(ctx, repo)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  	defer gitRepo.Close()
   218  
   219  	go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) {
   220  		defer func() {
   221  			if r := recover(); r != nil {
   222  				done <- fmt.Errorf("%v", r)
   223  			}
   224  		}()
   225  
   226  		if archiver.Type == git.BUNDLE {
   227  			err = gitRepo.CreateBundle(
   228  				ctx,
   229  				archiver.CommitID,
   230  				w,
   231  			)
   232  		} else {
   233  			err = gitRepo.CreateArchive(
   234  				ctx,
   235  				archiver.Type,
   236  				w,
   237  				setting.Repository.PrefixArchiveFiles,
   238  				archiver.CommitID,
   239  			)
   240  		}
   241  		_ = w.CloseWithError(err)
   242  		done <- err
   243  	}(done, w, archiver, gitRepo)
   244  
   245  	// TODO: add lfs data to zip
   246  	// TODO: add submodule data to zip
   247  
   248  	if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil {
   249  		return nil, fmt.Errorf("unable to write archive: %w", err)
   250  	}
   251  
   252  	err = <-done
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  
   257  	if archiver.Status == repo_model.ArchiverGenerating {
   258  		archiver.Status = repo_model.ArchiverReady
   259  		if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil {
   260  			return nil, err
   261  		}
   262  	}
   263  
   264  	return archiver, committer.Commit()
   265  }
   266  
   267  // ArchiveRepository satisfies the ArchiveRequest being passed in.  Processing
   268  // will occur in a separate goroutine, as this phase may take a while to
   269  // complete.  If the archive already exists, ArchiveRepository will not do
   270  // anything.  In all cases, the caller should be examining the *ArchiveRequest
   271  // being returned for completion, as it may be different than the one they passed
   272  // in.
   273  func ArchiveRepository(ctx context.Context, request *ArchiveRequest) (*repo_model.RepoArchiver, error) {
   274  	return doArchive(ctx, request)
   275  }
   276  
   277  var archiverQueue *queue.WorkerPoolQueue[*ArchiveRequest]
   278  
   279  // Init initializes archiver
   280  func Init(ctx context.Context) error {
   281  	handler := func(items ...*ArchiveRequest) []*ArchiveRequest {
   282  		for _, archiveReq := range items {
   283  			log.Trace("ArchiverData Process: %#v", archiveReq)
   284  			if _, err := doArchive(ctx, archiveReq); err != nil {
   285  				log.Error("Archive %v failed: %v", archiveReq, err)
   286  			}
   287  		}
   288  		return nil
   289  	}
   290  
   291  	archiverQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo-archive", handler)
   292  	if archiverQueue == nil {
   293  		return errors.New("unable to create repo-archive queue")
   294  	}
   295  	go graceful.GetManager().RunWithCancel(archiverQueue)
   296  
   297  	return nil
   298  }
   299  
   300  // StartArchive push the archive request to the queue
   301  func StartArchive(request *ArchiveRequest) error {
   302  	has, err := archiverQueue.Has(request)
   303  	if err != nil {
   304  		return err
   305  	}
   306  	if has {
   307  		return nil
   308  	}
   309  	return archiverQueue.Push(request)
   310  }
   311  
   312  func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error {
   313  	if _, err := db.DeleteByID[repo_model.RepoArchiver](ctx, archiver.ID); err != nil {
   314  		return err
   315  	}
   316  	p := archiver.RelativePath()
   317  	if err := storage.RepoArchives.Delete(p); err != nil {
   318  		log.Error("delete repo archive file failed: %v", err)
   319  	}
   320  	return nil
   321  }
   322  
   323  // DeleteOldRepositoryArchives deletes old repository archives.
   324  func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error {
   325  	log.Trace("Doing: ArchiveCleanup")
   326  
   327  	for {
   328  		archivers, err := db.Find[repo_model.RepoArchiver](ctx, repo_model.FindRepoArchiversOption{
   329  			ListOptions: db.ListOptions{
   330  				PageSize: 100,
   331  				Page:     1,
   332  			},
   333  			OlderThan: olderThan,
   334  		})
   335  		if err != nil {
   336  			log.Trace("Error: ArchiveClean: %v", err)
   337  			return err
   338  		}
   339  
   340  		for _, archiver := range archivers {
   341  			if err := deleteOldRepoArchiver(ctx, archiver); err != nil {
   342  				return err
   343  			}
   344  		}
   345  		if len(archivers) < 100 {
   346  			break
   347  		}
   348  	}
   349  
   350  	log.Trace("Finished: ArchiveCleanup")
   351  	return nil
   352  }
   353  
   354  // DeleteRepositoryArchives deletes all repositories' archives.
   355  func DeleteRepositoryArchives(ctx context.Context) error {
   356  	if err := repo_model.DeleteAllRepoArchives(ctx); err != nil {
   357  		return err
   358  	}
   359  	return storage.Clean(storage.RepoArchives)
   360  }