github.com/Microsoft/azure-vhd-utils@v0.0.0-20230613175315-7c30a3748a1b/upload/upload.go (about)

     1  package upload
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"time"
     8  
     9  	"github.com/Azure/azure-sdk-for-go/storage"
    10  	"github.com/Microsoft/azure-vhd-utils/upload/concurrent"
    11  	"github.com/Microsoft/azure-vhd-utils/upload/progress"
    12  	"github.com/Microsoft/azure-vhd-utils/vhdcore/common"
    13  	"github.com/Microsoft/azure-vhd-utils/vhdcore/diskstream"
    14  )
    15  
    16  // DiskUploadContext type describes VHD upload context, this includes the disk stream to read from, the ranges of
    17  // the stream to read, the destination blob and it's container, the client to communicate with Azure storage and
    18  // the number of parallel go-routines to use for upload.
    19  //
    20  type DiskUploadContext struct {
    21  	VhdStream             *diskstream.DiskStream    // The stream whose ranges needs to be uploaded
    22  	AlreadyProcessedBytes int64                     // The size in bytes already uploaded
    23  	UploadableRanges      []*common.IndexRange      // The subset of stream ranges to be uploaded
    24  	BlobServiceClient     storage.BlobStorageClient // The client to make Azure blob service API calls
    25  	ContainerName         string                    // The container in which page blob resides
    26  	BlobName              string                    // The destination page blob name
    27  	Parallelism           int                       // The number of concurrent goroutines to be used for upload
    28  	Resume                bool                      // Indicate whether this is a new or resuming upload
    29  	MD5Hash               []byte                    // MD5Hash to be set in the page blob properties once upload finishes
    30  }
    31  
    32  // oneMB is one MegaByte
    33  //
    34  const oneMB = float64(1048576)
    35  
    36  // Upload uploads the disk ranges described by the parameter cxt, this parameter describes the disk stream to
    37  // read from, the ranges of the stream to read, the destination blob and it's container, the client to communicate
    38  // with Azure storage and the number of parallel go-routines to use for upload.
    39  //
    40  func Upload(cxt *DiskUploadContext) error {
    41  	// Get the channel that contains stream of disk data to upload
    42  	dataWithRangeChan, streamReadErrChan := GetDataWithRanges(cxt.VhdStream, cxt.UploadableRanges)
    43  
    44  	// The channel to send upload request to load-balancer
    45  	requtestChan := make(chan *concurrent.Request, 0)
    46  
    47  	// Prepare and start the load-balancer that load request across 'cxt.Parallelism' workers
    48  	loadBalancer := concurrent.NewBalancer(cxt.Parallelism)
    49  	loadBalancer.Init()
    50  	workerErrorChan, allWorkersFinishedChan := loadBalancer.Run(requtestChan)
    51  
    52  	// Calculate the actual size of the data to upload
    53  	uploadSizeInBytes := int64(0)
    54  	for _, r := range cxt.UploadableRanges {
    55  		uploadSizeInBytes += r.Length()
    56  	}
    57  	fmt.Printf("\nEffective upload size: %.2f MB (from %.2f MB originally)", float64(uploadSizeInBytes)/oneMB, float64(cxt.VhdStream.GetSize())/oneMB)
    58  
    59  	// Prepare and start the upload progress tracker
    60  	uploadProgress := progress.NewStatus(cxt.Parallelism, cxt.AlreadyProcessedBytes, uploadSizeInBytes, progress.NewComputestateDefaultSize())
    61  	progressChan := uploadProgress.Run()
    62  
    63  	// read progress status from progress tracker and print it
    64  	go readAndPrintProgress(progressChan, cxt.Resume)
    65  
    66  	// listen for errors reported by workers and print it
    67  	var allWorkSucceeded = true
    68  	go func() {
    69  		for {
    70  			fmt.Println(<-workerErrorChan)
    71  			allWorkSucceeded = false
    72  		}
    73  	}()
    74  
    75  	var err error
    76  L:
    77  	for {
    78  		select {
    79  		case dataWithRange, ok := <-dataWithRangeChan:
    80  			if !ok {
    81  				close(requtestChan)
    82  				break L
    83  			}
    84  
    85  			// Create work request
    86  			//
    87  			req := &concurrent.Request{
    88  				Work: func() error {
    89  					err := cxt.BlobServiceClient.PutPage(cxt.ContainerName,
    90  						cxt.BlobName,
    91  						dataWithRange.Range.Start,
    92  						dataWithRange.Range.End,
    93  						storage.PageWriteTypeUpdate,
    94  						dataWithRange.Data,
    95  						nil)
    96  					if err == nil {
    97  						uploadProgress.ReportBytesProcessedCount(dataWithRange.Range.Length())
    98  					}
    99  					return err
   100  				},
   101  				ShouldRetry: func(e error) bool {
   102  					return true
   103  				},
   104  				ID: dataWithRange.Range.String(),
   105  			}
   106  
   107  			// Send work request to load balancer for processing
   108  			//
   109  			requtestChan <- req
   110  		case err = <-streamReadErrChan:
   111  			close(requtestChan)
   112  			loadBalancer.TearDownWorkers()
   113  			break L
   114  		}
   115  	}
   116  
   117  	<-allWorkersFinishedChan
   118  	uploadProgress.Close()
   119  
   120  	if !allWorkSucceeded {
   121  		err = errors.New("\nUpload Incomplete: Some blocks of the VHD failed to upload, rerun the command to upload those blocks")
   122  	}
   123  
   124  	if err == nil {
   125  		fmt.Printf("\r Completed: %3d%% [%10.2f MB] RemainingTime: %02dh:%02dm:%02ds Throughput: %d Mb/sec  %2c ",
   126  			100,
   127  			float64(uploadSizeInBytes)/oneMB,
   128  			0, 0, 0,
   129  			0, ' ')
   130  
   131  	}
   132  	return err
   133  }
   134  
   135  // GetDataWithRanges with start reading and streaming the ranges from the disk identified by the parameter ranges.
   136  // It returns two channels, a data channel to stream the disk ranges and a channel to send any error while reading
   137  // the disk. On successful completion the data channel will be closed. the caller must not expect any more value in
   138  // the data channel if the error channel is signaled.
   139  //
   140  func GetDataWithRanges(stream *diskstream.DiskStream, ranges []*common.IndexRange) (<-chan *DataWithRange, <-chan error) {
   141  	dataWithRangeChan := make(chan *DataWithRange, 0)
   142  	errorChan := make(chan error, 0)
   143  	go func() {
   144  		for _, r := range ranges {
   145  			dataWithRange := &DataWithRange{
   146  				Range: r,
   147  				Data:  make([]byte, r.Length()),
   148  			}
   149  			_, err := stream.Seek(r.Start, 0)
   150  			if err != nil {
   151  				errorChan <- err
   152  				return
   153  			}
   154  			_, err = io.ReadFull(stream, dataWithRange.Data)
   155  			if err != nil {
   156  				errorChan <- err
   157  				return
   158  			}
   159  			dataWithRangeChan <- dataWithRange
   160  		}
   161  		close(dataWithRangeChan)
   162  	}()
   163  	return dataWithRangeChan, errorChan
   164  }
   165  
   166  // readAndPrintProgress reads the progress records from the given progress channel and output it. It reads the
   167  // progress record until the channel is closed.
   168  //
   169  func readAndPrintProgress(progressChan <-chan *progress.Record, resume bool) {
   170  	var spinChars = [4]rune{'\\', '|', '/', '-'}
   171  	s := time.Time{}
   172  	if resume {
   173  		fmt.Println("\nResuming VHD upload..")
   174  	} else {
   175  		fmt.Println("\nUploading the VHD..")
   176  	}
   177  
   178  	i := 0
   179  	for progressRecord := range progressChan {
   180  		if i == 4 {
   181  			i = 0
   182  		}
   183  		t := s.Add(progressRecord.RemainingDuration)
   184  		fmt.Printf("\r Completed: %3d%% [%10.2f MB] RemainingTime: %02dh:%02dm:%02ds Throughput: %d Mb/sec  %2c ",
   185  			int(progressRecord.PercentComplete),
   186  			float64(progressRecord.BytesProcessed)/oneMB,
   187  			t.Hour(), t.Minute(), t.Second(),
   188  			int(progressRecord.AverageThroughputMbPerSecond),
   189  			spinChars[i],
   190  		)
   191  		i++
   192  	}
   193  }