code.gitea.io/gitea@v1.22.3/routers/api/actions/artifacts_chunks.go (about)

     1  // Copyright 2023 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package actions
     5  
     6  import (
     7  	"crypto/md5"
     8  	"crypto/sha256"
     9  	"encoding/base64"
    10  	"encoding/hex"
    11  	"errors"
    12  	"fmt"
    13  	"hash"
    14  	"io"
    15  	"path/filepath"
    16  	"sort"
    17  	"strings"
    18  	"time"
    19  
    20  	"code.gitea.io/gitea/models/actions"
    21  	"code.gitea.io/gitea/models/db"
    22  	"code.gitea.io/gitea/modules/log"
    23  	"code.gitea.io/gitea/modules/storage"
    24  )
    25  
    26  func saveUploadChunkBase(st storage.ObjectStorage, ctx *ArtifactContext,
    27  	artifact *actions.ActionArtifact,
    28  	contentSize, runID, start, end, length int64, checkMd5 bool,
    29  ) (int64, error) {
    30  	// build chunk store path
    31  	storagePath := fmt.Sprintf("tmp%d/%d-%d-%d-%d.chunk", runID, runID, artifact.ID, start, end)
    32  	var r io.Reader = ctx.Req.Body
    33  	var hasher hash.Hash
    34  	if checkMd5 {
    35  		// use io.TeeReader to avoid reading all body to md5 sum.
    36  		// it writes data to hasher after reading end
    37  		// if hash is not matched, delete the read-end result
    38  		hasher = md5.New()
    39  		r = io.TeeReader(r, hasher)
    40  	}
    41  	// save chunk to storage
    42  	writtenSize, err := st.Save(storagePath, r, contentSize)
    43  	if err != nil {
    44  		return -1, fmt.Errorf("save chunk to storage error: %v", err)
    45  	}
    46  	var checkErr error
    47  	if checkMd5 {
    48  		// check md5
    49  		reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header)
    50  		chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil))
    51  		log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String)
    52  		// if md5 not match, delete the chunk
    53  		if reqMd5String != chunkMd5String {
    54  			checkErr = fmt.Errorf("md5 not match")
    55  		}
    56  	}
    57  	if writtenSize != contentSize {
    58  		checkErr = errors.Join(checkErr, fmt.Errorf("contentSize not match body size"))
    59  	}
    60  	if checkErr != nil {
    61  		if err := st.Delete(storagePath); err != nil {
    62  			log.Error("Error deleting chunk: %s, %v", storagePath, err)
    63  		}
    64  		return -1, checkErr
    65  	}
    66  	log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d",
    67  		storagePath, contentSize, artifact.ID, start, end)
    68  	// return chunk total size
    69  	return length, nil
    70  }
    71  
    72  func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext,
    73  	artifact *actions.ActionArtifact,
    74  	contentSize, runID int64,
    75  ) (int64, error) {
    76  	// parse content-range header, format: bytes 0-1023/146515
    77  	contentRange := ctx.Req.Header.Get("Content-Range")
    78  	start, end, length := int64(0), int64(0), int64(0)
    79  	if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil {
    80  		log.Warn("parse content range error: %v, content-range: %s", err, contentRange)
    81  		return -1, fmt.Errorf("parse content range error: %v", err)
    82  	}
    83  	return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, length, true)
    84  }
    85  
    86  func appendUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext,
    87  	artifact *actions.ActionArtifact,
    88  	start, contentSize, runID int64,
    89  ) (int64, error) {
    90  	end := start + contentSize - 1
    91  	return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, contentSize, false)
    92  }
    93  
    94  type chunkFileItem struct {
    95  	RunID      int64
    96  	ArtifactID int64
    97  	Start      int64
    98  	End        int64
    99  	Path       string
   100  }
   101  
   102  func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) {
   103  	storageDir := fmt.Sprintf("tmp%d", runID)
   104  	var chunks []*chunkFileItem
   105  	if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error {
   106  		baseName := filepath.Base(fpath)
   107  		// when read chunks from storage, it only contains storage dir and basename,
   108  		// no matter the subdirectory setting in storage config
   109  		item := chunkFileItem{Path: storageDir + "/" + baseName}
   110  		if _, err := fmt.Sscanf(baseName, "%d-%d-%d-%d.chunk", &item.RunID, &item.ArtifactID, &item.Start, &item.End); err != nil {
   111  			return fmt.Errorf("parse content range error: %v", err)
   112  		}
   113  		chunks = append(chunks, &item)
   114  		return nil
   115  	}); err != nil {
   116  		return nil, err
   117  	}
   118  	// chunks group by artifact id
   119  	chunksMap := make(map[int64][]*chunkFileItem)
   120  	for _, c := range chunks {
   121  		chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c)
   122  	}
   123  	return chunksMap, nil
   124  }
   125  
   126  func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error {
   127  	// read all db artifacts by name
   128  	artifacts, err := db.Find[actions.ActionArtifact](ctx, actions.FindArtifactsOptions{
   129  		RunID:        runID,
   130  		ArtifactName: artifactName,
   131  	})
   132  	if err != nil {
   133  		return err
   134  	}
   135  	// read all uploading chunks from storage
   136  	chunksMap, err := listChunksByRunID(st, runID)
   137  	if err != nil {
   138  		return err
   139  	}
   140  	// range db artifacts to merge chunks
   141  	for _, art := range artifacts {
   142  		chunks, ok := chunksMap[art.ID]
   143  		if !ok {
   144  			log.Debug("artifact %d chunks not found", art.ID)
   145  			continue
   146  		}
   147  		if err := mergeChunksForArtifact(ctx, chunks, st, art, ""); err != nil {
   148  			return err
   149  		}
   150  	}
   151  	return nil
   152  }
   153  
   154  func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact, checksum string) error {
   155  	sort.Slice(chunks, func(i, j int) bool {
   156  		return chunks[i].Start < chunks[j].Start
   157  	})
   158  	allChunks := make([]*chunkFileItem, 0)
   159  	startAt := int64(-1)
   160  	// check if all chunks are uploaded and in order and clean repeated chunks
   161  	for _, c := range chunks {
   162  		// startAt is -1 means this is the first chunk
   163  		// previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order
   164  		// StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing
   165  		if c.Start == (startAt + 1) {
   166  			allChunks = append(allChunks, c)
   167  			startAt = c.End
   168  		}
   169  	}
   170  	// if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely
   171  	if startAt+1 != artifact.FileCompressedSize {
   172  		log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID)
   173  		return nil
   174  	}
   175  	// use multiReader
   176  	readers := make([]io.Reader, 0, len(allChunks))
   177  	closeReaders := func() {
   178  		for _, r := range readers {
   179  			_ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function
   180  		}
   181  		readers = nil
   182  	}
   183  	defer closeReaders()
   184  	for _, c := range allChunks {
   185  		var readCloser io.ReadCloser
   186  		var err error
   187  		if readCloser, err = st.Open(c.Path); err != nil {
   188  			return fmt.Errorf("open chunk error: %v, %s", err, c.Path)
   189  		}
   190  		readers = append(readers, readCloser)
   191  	}
   192  	mergedReader := io.MultiReader(readers...)
   193  	shaPrefix := "sha256:"
   194  	var hash hash.Hash
   195  	if strings.HasPrefix(checksum, shaPrefix) {
   196  		hash = sha256.New()
   197  	}
   198  	if hash != nil {
   199  		mergedReader = io.TeeReader(mergedReader, hash)
   200  	}
   201  
   202  	// if chunk is gzip, use gz as extension
   203  	// download-artifact action will use content-encoding header to decide if it should decompress the file
   204  	extension := "chunk"
   205  	if artifact.ContentEncoding == "gzip" {
   206  		extension = "chunk.gz"
   207  	}
   208  
   209  	// save merged file
   210  	storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension)
   211  	written, err := st.Save(storagePath, mergedReader, artifact.FileCompressedSize)
   212  	if err != nil {
   213  		return fmt.Errorf("save merged file error: %v", err)
   214  	}
   215  	if written != artifact.FileCompressedSize {
   216  		return fmt.Errorf("merged file size is not equal to chunk length")
   217  	}
   218  
   219  	defer func() {
   220  		closeReaders() // close before delete
   221  		// drop chunks
   222  		for _, c := range chunks {
   223  			if err := st.Delete(c.Path); err != nil {
   224  				log.Warn("Error deleting chunk: %s, %v", c.Path, err)
   225  			}
   226  		}
   227  	}()
   228  
   229  	if hash != nil {
   230  		rawChecksum := hash.Sum(nil)
   231  		actualChecksum := hex.EncodeToString(rawChecksum)
   232  		if !strings.HasSuffix(checksum, actualChecksum) {
   233  			return fmt.Errorf("update artifact error checksum is invalid")
   234  		}
   235  	}
   236  
   237  	// save storage path to artifact
   238  	log.Debug("[artifact] merge chunks to artifact: %d, %s, old:%s", artifact.ID, storagePath, artifact.StoragePath)
   239  	// if artifact is already uploaded, delete the old file
   240  	if artifact.StoragePath != "" {
   241  		if err := st.Delete(artifact.StoragePath); err != nil {
   242  			log.Warn("Error deleting old artifact: %s, %v", artifact.StoragePath, err)
   243  		}
   244  	}
   245  
   246  	artifact.StoragePath = storagePath
   247  	artifact.Status = int64(actions.ArtifactStatusUploadConfirmed)
   248  	if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil {
   249  		return fmt.Errorf("update artifact error: %v", err)
   250  	}
   251  
   252  	return nil
   253  }