code.gitea.io/gitea@v1.22.3/routers/api/actions/artifacts_chunks.go (about) 1 // Copyright 2023 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package actions 5 6 import ( 7 "crypto/md5" 8 "crypto/sha256" 9 "encoding/base64" 10 "encoding/hex" 11 "errors" 12 "fmt" 13 "hash" 14 "io" 15 "path/filepath" 16 "sort" 17 "strings" 18 "time" 19 20 "code.gitea.io/gitea/models/actions" 21 "code.gitea.io/gitea/models/db" 22 "code.gitea.io/gitea/modules/log" 23 "code.gitea.io/gitea/modules/storage" 24 ) 25 26 func saveUploadChunkBase(st storage.ObjectStorage, ctx *ArtifactContext, 27 artifact *actions.ActionArtifact, 28 contentSize, runID, start, end, length int64, checkMd5 bool, 29 ) (int64, error) { 30 // build chunk store path 31 storagePath := fmt.Sprintf("tmp%d/%d-%d-%d-%d.chunk", runID, runID, artifact.ID, start, end) 32 var r io.Reader = ctx.Req.Body 33 var hasher hash.Hash 34 if checkMd5 { 35 // use io.TeeReader to avoid reading all body to md5 sum. 36 // it writes data to hasher after reading end 37 // if hash is not matched, delete the read-end result 38 hasher = md5.New() 39 r = io.TeeReader(r, hasher) 40 } 41 // save chunk to storage 42 writtenSize, err := st.Save(storagePath, r, contentSize) 43 if err != nil { 44 return -1, fmt.Errorf("save chunk to storage error: %v", err) 45 } 46 var checkErr error 47 if checkMd5 { 48 // check md5 49 reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header) 50 chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) 51 log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String) 52 // if md5 not match, delete the chunk 53 if reqMd5String != chunkMd5String { 54 checkErr = fmt.Errorf("md5 not match") 55 } 56 } 57 if writtenSize != contentSize { 58 checkErr = errors.Join(checkErr, fmt.Errorf("contentSize not match body size")) 59 } 60 if checkErr != nil { 61 if err := st.Delete(storagePath); err != nil { 62 log.Error("Error deleting chunk: %s, %v", storagePath, err) 63 } 64 return -1, checkErr 65 } 66 log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d", 67 storagePath, contentSize, artifact.ID, start, end) 68 // return chunk total size 69 return length, nil 70 } 71 72 func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext, 73 artifact *actions.ActionArtifact, 74 contentSize, runID int64, 75 ) (int64, error) { 76 // parse content-range header, format: bytes 0-1023/146515 77 contentRange := ctx.Req.Header.Get("Content-Range") 78 start, end, length := int64(0), int64(0), int64(0) 79 if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil { 80 log.Warn("parse content range error: %v, content-range: %s", err, contentRange) 81 return -1, fmt.Errorf("parse content range error: %v", err) 82 } 83 return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, length, true) 84 } 85 86 func appendUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext, 87 artifact *actions.ActionArtifact, 88 start, contentSize, runID int64, 89 ) (int64, error) { 90 end := start + contentSize - 1 91 return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, contentSize, false) 92 } 93 94 type chunkFileItem struct { 95 RunID int64 96 ArtifactID int64 97 Start int64 98 End int64 99 Path string 100 } 101 102 func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) { 103 storageDir := fmt.Sprintf("tmp%d", runID) 104 var chunks []*chunkFileItem 105 if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error { 106 baseName := filepath.Base(fpath) 107 // when read chunks from storage, it only contains storage dir and basename, 108 // no matter the subdirectory setting in storage config 109 item := chunkFileItem{Path: storageDir + "/" + baseName} 110 if _, err := fmt.Sscanf(baseName, "%d-%d-%d-%d.chunk", &item.RunID, &item.ArtifactID, &item.Start, &item.End); err != nil { 111 return fmt.Errorf("parse content range error: %v", err) 112 } 113 chunks = append(chunks, &item) 114 return nil 115 }); err != nil { 116 return nil, err 117 } 118 // chunks group by artifact id 119 chunksMap := make(map[int64][]*chunkFileItem) 120 for _, c := range chunks { 121 chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c) 122 } 123 return chunksMap, nil 124 } 125 126 func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error { 127 // read all db artifacts by name 128 artifacts, err := db.Find[actions.ActionArtifact](ctx, actions.FindArtifactsOptions{ 129 RunID: runID, 130 ArtifactName: artifactName, 131 }) 132 if err != nil { 133 return err 134 } 135 // read all uploading chunks from storage 136 chunksMap, err := listChunksByRunID(st, runID) 137 if err != nil { 138 return err 139 } 140 // range db artifacts to merge chunks 141 for _, art := range artifacts { 142 chunks, ok := chunksMap[art.ID] 143 if !ok { 144 log.Debug("artifact %d chunks not found", art.ID) 145 continue 146 } 147 if err := mergeChunksForArtifact(ctx, chunks, st, art, ""); err != nil { 148 return err 149 } 150 } 151 return nil 152 } 153 154 func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact, checksum string) error { 155 sort.Slice(chunks, func(i, j int) bool { 156 return chunks[i].Start < chunks[j].Start 157 }) 158 allChunks := make([]*chunkFileItem, 0) 159 startAt := int64(-1) 160 // check if all chunks are uploaded and in order and clean repeated chunks 161 for _, c := range chunks { 162 // startAt is -1 means this is the first chunk 163 // previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order 164 // StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing 165 if c.Start == (startAt + 1) { 166 allChunks = append(allChunks, c) 167 startAt = c.End 168 } 169 } 170 // if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely 171 if startAt+1 != artifact.FileCompressedSize { 172 log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID) 173 return nil 174 } 175 // use multiReader 176 readers := make([]io.Reader, 0, len(allChunks)) 177 closeReaders := func() { 178 for _, r := range readers { 179 _ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function 180 } 181 readers = nil 182 } 183 defer closeReaders() 184 for _, c := range allChunks { 185 var readCloser io.ReadCloser 186 var err error 187 if readCloser, err = st.Open(c.Path); err != nil { 188 return fmt.Errorf("open chunk error: %v, %s", err, c.Path) 189 } 190 readers = append(readers, readCloser) 191 } 192 mergedReader := io.MultiReader(readers...) 193 shaPrefix := "sha256:" 194 var hash hash.Hash 195 if strings.HasPrefix(checksum, shaPrefix) { 196 hash = sha256.New() 197 } 198 if hash != nil { 199 mergedReader = io.TeeReader(mergedReader, hash) 200 } 201 202 // if chunk is gzip, use gz as extension 203 // download-artifact action will use content-encoding header to decide if it should decompress the file 204 extension := "chunk" 205 if artifact.ContentEncoding == "gzip" { 206 extension = "chunk.gz" 207 } 208 209 // save merged file 210 storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension) 211 written, err := st.Save(storagePath, mergedReader, artifact.FileCompressedSize) 212 if err != nil { 213 return fmt.Errorf("save merged file error: %v", err) 214 } 215 if written != artifact.FileCompressedSize { 216 return fmt.Errorf("merged file size is not equal to chunk length") 217 } 218 219 defer func() { 220 closeReaders() // close before delete 221 // drop chunks 222 for _, c := range chunks { 223 if err := st.Delete(c.Path); err != nil { 224 log.Warn("Error deleting chunk: %s, %v", c.Path, err) 225 } 226 } 227 }() 228 229 if hash != nil { 230 rawChecksum := hash.Sum(nil) 231 actualChecksum := hex.EncodeToString(rawChecksum) 232 if !strings.HasSuffix(checksum, actualChecksum) { 233 return fmt.Errorf("update artifact error checksum is invalid") 234 } 235 } 236 237 // save storage path to artifact 238 log.Debug("[artifact] merge chunks to artifact: %d, %s, old:%s", artifact.ID, storagePath, artifact.StoragePath) 239 // if artifact is already uploaded, delete the old file 240 if artifact.StoragePath != "" { 241 if err := st.Delete(artifact.StoragePath); err != nil { 242 log.Warn("Error deleting old artifact: %s, %v", artifact.StoragePath, err) 243 } 244 } 245 246 artifact.StoragePath = storagePath 247 artifact.Status = int64(actions.ArtifactStatusUploadConfirmed) 248 if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { 249 return fmt.Errorf("update artifact error: %v", err) 250 } 251 252 return nil 253 }