code.gitea.io/gitea@v1.21.7/services/repository/archiver/archiver.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package archiver 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "regexp" 13 "strings" 14 "time" 15 16 "code.gitea.io/gitea/models/db" 17 repo_model "code.gitea.io/gitea/models/repo" 18 "code.gitea.io/gitea/modules/git" 19 "code.gitea.io/gitea/modules/graceful" 20 "code.gitea.io/gitea/modules/log" 21 "code.gitea.io/gitea/modules/process" 22 "code.gitea.io/gitea/modules/queue" 23 "code.gitea.io/gitea/modules/setting" 24 "code.gitea.io/gitea/modules/storage" 25 ) 26 27 // ArchiveRequest defines the parameters of an archive request, which notably 28 // includes the specific repository being archived as well as the commit, the 29 // name by which it was requested, and the kind of archive being requested. 30 // This is entirely opaque to external entities, though, and mostly used as a 31 // handle elsewhere. 32 type ArchiveRequest struct { 33 RepoID int64 34 refName string 35 Type git.ArchiveType 36 CommitID string 37 } 38 39 // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all 40 // the way to 64. 41 var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`) 42 43 // ErrUnknownArchiveFormat request archive format is not supported 44 type ErrUnknownArchiveFormat struct { 45 RequestFormat string 46 } 47 48 // Error implements error 49 func (err ErrUnknownArchiveFormat) Error() string { 50 return fmt.Sprintf("unknown format: %s", err.RequestFormat) 51 } 52 53 // Is implements error 54 func (ErrUnknownArchiveFormat) Is(err error) bool { 55 _, ok := err.(ErrUnknownArchiveFormat) 56 return ok 57 } 58 59 // RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found. 60 type RepoRefNotFoundError struct { 61 RefName string 62 } 63 64 // Error implements error. 65 func (e RepoRefNotFoundError) Error() string { 66 return fmt.Sprintf("unrecognized repository reference: %s", e.RefName) 67 } 68 69 func (e RepoRefNotFoundError) Is(err error) bool { 70 _, ok := err.(RepoRefNotFoundError) 71 return ok 72 } 73 74 // NewRequest creates an archival request, based on the URI. The 75 // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() 76 // if it's determined that the request still needs to be satisfied. 77 func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) { 78 r := &ArchiveRequest{ 79 RepoID: repoID, 80 } 81 82 var ext string 83 switch { 84 case strings.HasSuffix(uri, ".zip"): 85 ext = ".zip" 86 r.Type = git.ZIP 87 case strings.HasSuffix(uri, ".tar.gz"): 88 ext = ".tar.gz" 89 r.Type = git.TARGZ 90 case strings.HasSuffix(uri, ".bundle"): 91 ext = ".bundle" 92 r.Type = git.BUNDLE 93 default: 94 return nil, ErrUnknownArchiveFormat{RequestFormat: uri} 95 } 96 97 r.refName = strings.TrimSuffix(uri, ext) 98 99 var err error 100 // Get corresponding commit. 101 if repo.IsBranchExist(r.refName) { 102 r.CommitID, err = repo.GetBranchCommitID(r.refName) 103 if err != nil { 104 return nil, err 105 } 106 } else if repo.IsTagExist(r.refName) { 107 r.CommitID, err = repo.GetTagCommitID(r.refName) 108 if err != nil { 109 return nil, err 110 } 111 } else if shaRegex.MatchString(r.refName) { 112 if repo.IsCommitExist(r.refName) { 113 r.CommitID = r.refName 114 } else { 115 return nil, git.ErrNotExist{ 116 ID: r.refName, 117 } 118 } 119 } else { 120 return nil, RepoRefNotFoundError{RefName: r.refName} 121 } 122 123 return r, nil 124 } 125 126 // GetArchiveName returns the name of the caller, based on the ref used by the 127 // caller to create this request. 128 func (aReq *ArchiveRequest) GetArchiveName() string { 129 return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String() 130 } 131 132 // Await awaits the completion of an ArchiveRequest. If the archive has 133 // already been prepared the method returns immediately. Otherwise an archiver 134 // process will be started and its completion awaited. On success the returned 135 // RepoArchiver may be used to download the archive. Note that even if the 136 // context is cancelled/times out a started archiver will still continue to run 137 // in the background. 138 func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) { 139 archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) 140 if err != nil { 141 return nil, fmt.Errorf("models.GetRepoArchiver: %w", err) 142 } 143 144 if archiver != nil && archiver.Status == repo_model.ArchiverReady { 145 // Archive already generated, we're done. 146 return archiver, nil 147 } 148 149 if err := StartArchive(aReq); err != nil { 150 return nil, fmt.Errorf("archiver.StartArchive: %w", err) 151 } 152 153 poll := time.NewTicker(time.Second * 1) 154 defer poll.Stop() 155 156 for { 157 select { 158 case <-graceful.GetManager().HammerContext().Done(): 159 // System stopped. 160 return nil, graceful.GetManager().HammerContext().Err() 161 case <-ctx.Done(): 162 return nil, ctx.Err() 163 case <-poll.C: 164 archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) 165 if err != nil { 166 return nil, fmt.Errorf("repo_model.GetRepoArchiver: %w", err) 167 } 168 if archiver != nil && archiver.Status == repo_model.ArchiverReady { 169 return archiver, nil 170 } 171 } 172 } 173 } 174 175 func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) { 176 txCtx, committer, err := db.TxContext(db.DefaultContext) 177 if err != nil { 178 return nil, err 179 } 180 defer committer.Close() 181 ctx, _, finished := process.GetManager().AddContext(txCtx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName())) 182 defer finished() 183 184 archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID) 185 if err != nil { 186 return nil, err 187 } 188 189 if archiver != nil { 190 // FIXME: If another process are generating it, we think it's not ready and just return 191 // Or we should wait until the archive generated. 192 if archiver.Status == repo_model.ArchiverGenerating { 193 return nil, nil 194 } 195 } else { 196 archiver = &repo_model.RepoArchiver{ 197 RepoID: r.RepoID, 198 Type: r.Type, 199 CommitID: r.CommitID, 200 Status: repo_model.ArchiverGenerating, 201 } 202 if err := repo_model.AddRepoArchiver(ctx, archiver); err != nil { 203 return nil, err 204 } 205 } 206 207 rPath := archiver.RelativePath() 208 _, err = storage.RepoArchives.Stat(rPath) 209 if err == nil { 210 if archiver.Status == repo_model.ArchiverGenerating { 211 archiver.Status = repo_model.ArchiverReady 212 if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil { 213 return nil, err 214 } 215 } 216 return archiver, committer.Commit() 217 } 218 219 if !errors.Is(err, os.ErrNotExist) { 220 return nil, fmt.Errorf("unable to stat archive: %w", err) 221 } 222 223 rd, w := io.Pipe() 224 defer func() { 225 w.Close() 226 rd.Close() 227 }() 228 done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish 229 repo, err := repo_model.GetRepositoryByID(ctx, archiver.RepoID) 230 if err != nil { 231 return nil, fmt.Errorf("archiver.LoadRepo failed: %w", err) 232 } 233 234 gitRepo, err := git.OpenRepository(ctx, repo.RepoPath()) 235 if err != nil { 236 return nil, err 237 } 238 defer gitRepo.Close() 239 240 go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) { 241 defer func() { 242 if r := recover(); r != nil { 243 done <- fmt.Errorf("%v", r) 244 } 245 }() 246 247 if archiver.Type == git.BUNDLE { 248 err = gitRepo.CreateBundle( 249 ctx, 250 archiver.CommitID, 251 w, 252 ) 253 } else { 254 err = gitRepo.CreateArchive( 255 ctx, 256 archiver.Type, 257 w, 258 setting.Repository.PrefixArchiveFiles, 259 archiver.CommitID, 260 ) 261 } 262 _ = w.CloseWithError(err) 263 done <- err 264 }(done, w, archiver, gitRepo) 265 266 // TODO: add lfs data to zip 267 // TODO: add submodule data to zip 268 269 if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil { 270 return nil, fmt.Errorf("unable to write archive: %w", err) 271 } 272 273 err = <-done 274 if err != nil { 275 return nil, err 276 } 277 278 if archiver.Status == repo_model.ArchiverGenerating { 279 archiver.Status = repo_model.ArchiverReady 280 if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil { 281 return nil, err 282 } 283 } 284 285 return archiver, committer.Commit() 286 } 287 288 // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing 289 // will occur in a separate goroutine, as this phase may take a while to 290 // complete. If the archive already exists, ArchiveRepository will not do 291 // anything. In all cases, the caller should be examining the *ArchiveRequest 292 // being returned for completion, as it may be different than the one they passed 293 // in. 294 func ArchiveRepository(request *ArchiveRequest) (*repo_model.RepoArchiver, error) { 295 return doArchive(request) 296 } 297 298 var archiverQueue *queue.WorkerPoolQueue[*ArchiveRequest] 299 300 // Init initializes archiver 301 func Init() error { 302 handler := func(items ...*ArchiveRequest) []*ArchiveRequest { 303 for _, archiveReq := range items { 304 log.Trace("ArchiverData Process: %#v", archiveReq) 305 if _, err := doArchive(archiveReq); err != nil { 306 log.Error("Archive %v failed: %v", archiveReq, err) 307 } 308 } 309 return nil 310 } 311 312 archiverQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo-archive", handler) 313 if archiverQueue == nil { 314 return errors.New("unable to create repo-archive queue") 315 } 316 go graceful.GetManager().RunWithCancel(archiverQueue) 317 318 return nil 319 } 320 321 // StartArchive push the archive request to the queue 322 func StartArchive(request *ArchiveRequest) error { 323 has, err := archiverQueue.Has(request) 324 if err != nil { 325 return err 326 } 327 if has { 328 return nil 329 } 330 return archiverQueue.Push(request) 331 } 332 333 func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error { 334 if err := repo_model.DeleteRepoArchiver(ctx, archiver); err != nil { 335 return err 336 } 337 p := archiver.RelativePath() 338 if err := storage.RepoArchives.Delete(p); err != nil { 339 log.Error("delete repo archive file failed: %v", err) 340 } 341 return nil 342 } 343 344 // DeleteOldRepositoryArchives deletes old repository archives. 345 func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error { 346 log.Trace("Doing: ArchiveCleanup") 347 348 for { 349 archivers, err := repo_model.FindRepoArchives(ctx, repo_model.FindRepoArchiversOption{ 350 ListOptions: db.ListOptions{ 351 PageSize: 100, 352 Page: 1, 353 }, 354 OlderThan: olderThan, 355 }) 356 if err != nil { 357 log.Trace("Error: ArchiveClean: %v", err) 358 return err 359 } 360 361 for _, archiver := range archivers { 362 if err := deleteOldRepoArchiver(ctx, archiver); err != nil { 363 return err 364 } 365 } 366 if len(archivers) < 100 { 367 break 368 } 369 } 370 371 log.Trace("Finished: ArchiveCleanup") 372 return nil 373 } 374 375 // DeleteRepositoryArchives deletes all repositories' archives. 376 func DeleteRepositoryArchives(ctx context.Context) error { 377 if err := repo_model.DeleteAllRepoArchives(ctx); err != nil { 378 return err 379 } 380 return storage.Clean(storage.RepoArchives) 381 }