code.gitea.io/gitea@v1.21.7/routers/api/actions/artifacts_chunks.go (about) 1 // Copyright 2023 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package actions 5 6 import ( 7 "crypto/md5" 8 "encoding/base64" 9 "fmt" 10 "io" 11 "path/filepath" 12 "sort" 13 "time" 14 15 "code.gitea.io/gitea/models/actions" 16 "code.gitea.io/gitea/modules/log" 17 "code.gitea.io/gitea/modules/storage" 18 ) 19 20 func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext, 21 artifact *actions.ActionArtifact, 22 contentSize, runID int64, 23 ) (int64, error) { 24 // parse content-range header, format: bytes 0-1023/146515 25 contentRange := ctx.Req.Header.Get("Content-Range") 26 start, end, length := int64(0), int64(0), int64(0) 27 if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil { 28 log.Warn("parse content range error: %v, content-range: %s", err, contentRange) 29 return -1, fmt.Errorf("parse content range error: %v", err) 30 } 31 // build chunk store path 32 storagePath := fmt.Sprintf("tmp%d/%d-%d-%d-%d.chunk", runID, runID, artifact.ID, start, end) 33 // use io.TeeReader to avoid reading all body to md5 sum. 34 // it writes data to hasher after reading end 35 // if hash is not matched, delete the read-end result 36 hasher := md5.New() 37 r := io.TeeReader(ctx.Req.Body, hasher) 38 // save chunk to storage 39 writtenSize, err := st.Save(storagePath, r, -1) 40 if err != nil { 41 return -1, fmt.Errorf("save chunk to storage error: %v", err) 42 } 43 // check md5 44 reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header) 45 chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) 46 log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String) 47 // if md5 not match, delete the chunk 48 if reqMd5String != chunkMd5String || writtenSize != contentSize { 49 if err := st.Delete(storagePath); err != nil { 50 log.Error("Error deleting chunk: %s, %v", storagePath, err) 51 } 52 return -1, fmt.Errorf("md5 not match") 53 } 54 log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d", 55 storagePath, contentSize, artifact.ID, start, end) 56 // return chunk total size 57 return length, nil 58 } 59 60 type chunkFileItem struct { 61 RunID int64 62 ArtifactID int64 63 Start int64 64 End int64 65 Path string 66 } 67 68 func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) { 69 storageDir := fmt.Sprintf("tmp%d", runID) 70 var chunks []*chunkFileItem 71 if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error { 72 baseName := filepath.Base(fpath) 73 // when read chunks from storage, it only contains storage dir and basename, 74 // no matter the subdirectory setting in storage config 75 item := chunkFileItem{Path: storageDir + "/" + baseName} 76 if _, err := fmt.Sscanf(baseName, "%d-%d-%d-%d.chunk", &item.RunID, &item.ArtifactID, &item.Start, &item.End); err != nil { 77 return fmt.Errorf("parse content range error: %v", err) 78 } 79 chunks = append(chunks, &item) 80 return nil 81 }); err != nil { 82 return nil, err 83 } 84 // chunks group by artifact id 85 chunksMap := make(map[int64][]*chunkFileItem) 86 for _, c := range chunks { 87 chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c) 88 } 89 return chunksMap, nil 90 } 91 92 func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error { 93 // read all db artifacts by name 94 artifacts, err := actions.ListArtifactsByRunIDAndName(ctx, runID, artifactName) 95 if err != nil { 96 return err 97 } 98 // read all uploading chunks from storage 99 chunksMap, err := listChunksByRunID(st, runID) 100 if err != nil { 101 return err 102 } 103 // range db artifacts to merge chunks 104 for _, art := range artifacts { 105 chunks, ok := chunksMap[art.ID] 106 if !ok { 107 log.Debug("artifact %d chunks not found", art.ID) 108 continue 109 } 110 if err := mergeChunksForArtifact(ctx, chunks, st, art); err != nil { 111 return err 112 } 113 } 114 return nil 115 } 116 117 func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact) error { 118 sort.Slice(chunks, func(i, j int) bool { 119 return chunks[i].Start < chunks[j].Start 120 }) 121 allChunks := make([]*chunkFileItem, 0) 122 startAt := int64(-1) 123 // check if all chunks are uploaded and in order and clean repeated chunks 124 for _, c := range chunks { 125 // startAt is -1 means this is the first chunk 126 // previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order 127 // StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing 128 if c.Start == (startAt + 1) { 129 allChunks = append(allChunks, c) 130 startAt = c.End 131 } 132 } 133 // if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely 134 if startAt+1 != artifact.FileCompressedSize { 135 log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID) 136 return nil 137 } 138 // use multiReader 139 readers := make([]io.Reader, 0, len(allChunks)) 140 closeReaders := func() { 141 for _, r := range readers { 142 _ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function 143 } 144 readers = nil 145 } 146 defer closeReaders() 147 for _, c := range allChunks { 148 var readCloser io.ReadCloser 149 var err error 150 if readCloser, err = st.Open(c.Path); err != nil { 151 return fmt.Errorf("open chunk error: %v, %s", err, c.Path) 152 } 153 readers = append(readers, readCloser) 154 } 155 mergedReader := io.MultiReader(readers...) 156 157 // if chunk is gzip, use gz as extension 158 // download-artifact action will use content-encoding header to decide if it should decompress the file 159 extension := "chunk" 160 if artifact.ContentEncoding == "gzip" { 161 extension = "chunk.gz" 162 } 163 164 // save merged file 165 storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension) 166 written, err := st.Save(storagePath, mergedReader, -1) 167 if err != nil { 168 return fmt.Errorf("save merged file error: %v", err) 169 } 170 if written != artifact.FileCompressedSize { 171 return fmt.Errorf("merged file size is not equal to chunk length") 172 } 173 174 defer func() { 175 closeReaders() // close before delete 176 // drop chunks 177 for _, c := range chunks { 178 if err := st.Delete(c.Path); err != nil { 179 log.Warn("Error deleting chunk: %s, %v", c.Path, err) 180 } 181 } 182 }() 183 184 // save storage path to artifact 185 log.Debug("[artifact] merge chunks to artifact: %d, %s, old:%s", artifact.ID, storagePath, artifact.StoragePath) 186 // if artifact is already uploaded, delete the old file 187 if artifact.StoragePath != "" { 188 if err := st.Delete(artifact.StoragePath); err != nil { 189 log.Warn("Error deleting old artifact: %s, %v", artifact.StoragePath, err) 190 } 191 } 192 193 artifact.StoragePath = storagePath 194 artifact.Status = int64(actions.ArtifactStatusUploadConfirmed) 195 if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { 196 return fmt.Errorf("update artifact error: %v", err) 197 } 198 199 return nil 200 }