code.gitea.io/gitea@v1.22.3/services/repository/archiver/archiver.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package archiver 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "strings" 13 "time" 14 15 "code.gitea.io/gitea/models/db" 16 repo_model "code.gitea.io/gitea/models/repo" 17 "code.gitea.io/gitea/modules/git" 18 "code.gitea.io/gitea/modules/gitrepo" 19 "code.gitea.io/gitea/modules/graceful" 20 "code.gitea.io/gitea/modules/log" 21 "code.gitea.io/gitea/modules/process" 22 "code.gitea.io/gitea/modules/queue" 23 "code.gitea.io/gitea/modules/setting" 24 "code.gitea.io/gitea/modules/storage" 25 ) 26 27 // ArchiveRequest defines the parameters of an archive request, which notably 28 // includes the specific repository being archived as well as the commit, the 29 // name by which it was requested, and the kind of archive being requested. 30 // This is entirely opaque to external entities, though, and mostly used as a 31 // handle elsewhere. 32 type ArchiveRequest struct { 33 RepoID int64 34 refName string 35 Type git.ArchiveType 36 CommitID string 37 } 38 39 // ErrUnknownArchiveFormat request archive format is not supported 40 type ErrUnknownArchiveFormat struct { 41 RequestFormat string 42 } 43 44 // Error implements error 45 func (err ErrUnknownArchiveFormat) Error() string { 46 return fmt.Sprintf("unknown format: %s", err.RequestFormat) 47 } 48 49 // Is implements error 50 func (ErrUnknownArchiveFormat) Is(err error) bool { 51 _, ok := err.(ErrUnknownArchiveFormat) 52 return ok 53 } 54 55 // RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found. 56 type RepoRefNotFoundError struct { 57 RefName string 58 } 59 60 // Error implements error. 61 func (e RepoRefNotFoundError) Error() string { 62 return fmt.Sprintf("unrecognized repository reference: %s", e.RefName) 63 } 64 65 func (e RepoRefNotFoundError) Is(err error) bool { 66 _, ok := err.(RepoRefNotFoundError) 67 return ok 68 } 69 70 // NewRequest creates an archival request, based on the URI. The 71 // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() 72 // if it's determined that the request still needs to be satisfied. 73 func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) { 74 r := &ArchiveRequest{ 75 RepoID: repoID, 76 } 77 78 var ext string 79 switch { 80 case strings.HasSuffix(uri, ".zip"): 81 ext = ".zip" 82 r.Type = git.ZIP 83 case strings.HasSuffix(uri, ".tar.gz"): 84 ext = ".tar.gz" 85 r.Type = git.TARGZ 86 case strings.HasSuffix(uri, ".bundle"): 87 ext = ".bundle" 88 r.Type = git.BUNDLE 89 default: 90 return nil, ErrUnknownArchiveFormat{RequestFormat: uri} 91 } 92 93 r.refName = strings.TrimSuffix(uri, ext) 94 95 // Get corresponding commit. 96 commitID, err := repo.ConvertToGitID(r.refName) 97 if err != nil { 98 return nil, RepoRefNotFoundError{RefName: r.refName} 99 } 100 101 r.CommitID = commitID.String() 102 return r, nil 103 } 104 105 // GetArchiveName returns the name of the caller, based on the ref used by the 106 // caller to create this request. 107 func (aReq *ArchiveRequest) GetArchiveName() string { 108 return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String() 109 } 110 111 // Await awaits the completion of an ArchiveRequest. If the archive has 112 // already been prepared the method returns immediately. Otherwise an archiver 113 // process will be started and its completion awaited. On success the returned 114 // RepoArchiver may be used to download the archive. Note that even if the 115 // context is cancelled/times out a started archiver will still continue to run 116 // in the background. 117 func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) { 118 archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) 119 if err != nil { 120 return nil, fmt.Errorf("models.GetRepoArchiver: %w", err) 121 } 122 123 if archiver != nil && archiver.Status == repo_model.ArchiverReady { 124 // Archive already generated, we're done. 125 return archiver, nil 126 } 127 128 if err := StartArchive(aReq); err != nil { 129 return nil, fmt.Errorf("archiver.StartArchive: %w", err) 130 } 131 132 poll := time.NewTicker(time.Second * 1) 133 defer poll.Stop() 134 135 for { 136 select { 137 case <-graceful.GetManager().HammerContext().Done(): 138 // System stopped. 139 return nil, graceful.GetManager().HammerContext().Err() 140 case <-ctx.Done(): 141 return nil, ctx.Err() 142 case <-poll.C: 143 archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) 144 if err != nil { 145 return nil, fmt.Errorf("repo_model.GetRepoArchiver: %w", err) 146 } 147 if archiver != nil && archiver.Status == repo_model.ArchiverReady { 148 return archiver, nil 149 } 150 } 151 } 152 } 153 154 func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver, error) { 155 txCtx, committer, err := db.TxContext(ctx) 156 if err != nil { 157 return nil, err 158 } 159 defer committer.Close() 160 ctx, _, finished := process.GetManager().AddContext(txCtx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName())) 161 defer finished() 162 163 archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID) 164 if err != nil { 165 return nil, err 166 } 167 168 if archiver != nil { 169 // FIXME: If another process are generating it, we think it's not ready and just return 170 // Or we should wait until the archive generated. 171 if archiver.Status == repo_model.ArchiverGenerating { 172 return nil, nil 173 } 174 } else { 175 archiver = &repo_model.RepoArchiver{ 176 RepoID: r.RepoID, 177 Type: r.Type, 178 CommitID: r.CommitID, 179 Status: repo_model.ArchiverGenerating, 180 } 181 if err := db.Insert(ctx, archiver); err != nil { 182 return nil, err 183 } 184 } 185 186 rPath := archiver.RelativePath() 187 _, err = storage.RepoArchives.Stat(rPath) 188 if err == nil { 189 if archiver.Status == repo_model.ArchiverGenerating { 190 archiver.Status = repo_model.ArchiverReady 191 if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil { 192 return nil, err 193 } 194 } 195 return archiver, committer.Commit() 196 } 197 198 if !errors.Is(err, os.ErrNotExist) { 199 return nil, fmt.Errorf("unable to stat archive: %w", err) 200 } 201 202 rd, w := io.Pipe() 203 defer func() { 204 w.Close() 205 rd.Close() 206 }() 207 done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish 208 repo, err := repo_model.GetRepositoryByID(ctx, archiver.RepoID) 209 if err != nil { 210 return nil, fmt.Errorf("archiver.LoadRepo failed: %w", err) 211 } 212 213 gitRepo, err := gitrepo.OpenRepository(ctx, repo) 214 if err != nil { 215 return nil, err 216 } 217 defer gitRepo.Close() 218 219 go func(done chan error, w *io.PipeWriter, archiver *repo_model.RepoArchiver, gitRepo *git.Repository) { 220 defer func() { 221 if r := recover(); r != nil { 222 done <- fmt.Errorf("%v", r) 223 } 224 }() 225 226 if archiver.Type == git.BUNDLE { 227 err = gitRepo.CreateBundle( 228 ctx, 229 archiver.CommitID, 230 w, 231 ) 232 } else { 233 err = gitRepo.CreateArchive( 234 ctx, 235 archiver.Type, 236 w, 237 setting.Repository.PrefixArchiveFiles, 238 archiver.CommitID, 239 ) 240 } 241 _ = w.CloseWithError(err) 242 done <- err 243 }(done, w, archiver, gitRepo) 244 245 // TODO: add lfs data to zip 246 // TODO: add submodule data to zip 247 248 if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil { 249 return nil, fmt.Errorf("unable to write archive: %w", err) 250 } 251 252 err = <-done 253 if err != nil { 254 return nil, err 255 } 256 257 if archiver.Status == repo_model.ArchiverGenerating { 258 archiver.Status = repo_model.ArchiverReady 259 if err = repo_model.UpdateRepoArchiverStatus(ctx, archiver); err != nil { 260 return nil, err 261 } 262 } 263 264 return archiver, committer.Commit() 265 } 266 267 // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing 268 // will occur in a separate goroutine, as this phase may take a while to 269 // complete. If the archive already exists, ArchiveRepository will not do 270 // anything. In all cases, the caller should be examining the *ArchiveRequest 271 // being returned for completion, as it may be different than the one they passed 272 // in. 273 func ArchiveRepository(ctx context.Context, request *ArchiveRequest) (*repo_model.RepoArchiver, error) { 274 return doArchive(ctx, request) 275 } 276 277 var archiverQueue *queue.WorkerPoolQueue[*ArchiveRequest] 278 279 // Init initializes archiver 280 func Init(ctx context.Context) error { 281 handler := func(items ...*ArchiveRequest) []*ArchiveRequest { 282 for _, archiveReq := range items { 283 log.Trace("ArchiverData Process: %#v", archiveReq) 284 if _, err := doArchive(ctx, archiveReq); err != nil { 285 log.Error("Archive %v failed: %v", archiveReq, err) 286 } 287 } 288 return nil 289 } 290 291 archiverQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo-archive", handler) 292 if archiverQueue == nil { 293 return errors.New("unable to create repo-archive queue") 294 } 295 go graceful.GetManager().RunWithCancel(archiverQueue) 296 297 return nil 298 } 299 300 // StartArchive push the archive request to the queue 301 func StartArchive(request *ArchiveRequest) error { 302 has, err := archiverQueue.Has(request) 303 if err != nil { 304 return err 305 } 306 if has { 307 return nil 308 } 309 return archiverQueue.Push(request) 310 } 311 312 func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error { 313 if _, err := db.DeleteByID[repo_model.RepoArchiver](ctx, archiver.ID); err != nil { 314 return err 315 } 316 p := archiver.RelativePath() 317 if err := storage.RepoArchives.Delete(p); err != nil { 318 log.Error("delete repo archive file failed: %v", err) 319 } 320 return nil 321 } 322 323 // DeleteOldRepositoryArchives deletes old repository archives. 324 func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error { 325 log.Trace("Doing: ArchiveCleanup") 326 327 for { 328 archivers, err := db.Find[repo_model.RepoArchiver](ctx, repo_model.FindRepoArchiversOption{ 329 ListOptions: db.ListOptions{ 330 PageSize: 100, 331 Page: 1, 332 }, 333 OlderThan: olderThan, 334 }) 335 if err != nil { 336 log.Trace("Error: ArchiveClean: %v", err) 337 return err 338 } 339 340 for _, archiver := range archivers { 341 if err := deleteOldRepoArchiver(ctx, archiver); err != nil { 342 return err 343 } 344 } 345 if len(archivers) < 100 { 346 break 347 } 348 } 349 350 log.Trace("Finished: ArchiveCleanup") 351 return nil 352 } 353 354 // DeleteRepositoryArchives deletes all repositories' archives. 355 func DeleteRepositoryArchives(ctx context.Context) error { 356 if err := repo_model.DeleteAllRepoArchives(ctx); err != nil { 357 return err 358 } 359 return storage.Clean(storage.RepoArchives) 360 }