yunion.io/x/cloudmux@v0.3.10-0-alpha.1/pkg/multicloud/azure/upload.go (about)

     1  // Copyright 2019 Yunion
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package azure
    16  
    17  import (
    18  	"bytes"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"math"
    23  	"time"
    24  
    25  	"github.com/Azure/azure-sdk-for-go/storage"
    26  	"github.com/Microsoft/azure-vhd-utils/vhdcore/block/bitmap"
    27  	"github.com/Microsoft/azure-vhd-utils/vhdcore/common"
    28  	"github.com/Microsoft/azure-vhd-utils/vhdcore/diskstream"
    29  	"github.com/Microsoft/azure-vhd-utils/vhdcore/footer"
    30  	"github.com/Microsoft/azure-vhd-utils/vhdcore/validator"
    31  
    32  	"yunion.io/x/cloudmux/pkg/multicloud/azure/concurrent"
    33  	"yunion.io/x/cloudmux/pkg/multicloud/azure/progress"
    34  )
    35  
    36  // DiskUploadContext type describes VHD upload context, this includes the disk stream to read from, the ranges of
    37  // the stream to read, the destination blob and it's container, the client to communicate with Azure storage and
    38  // the number of parallel go-routines to use for upload.
    39  //
    40  
    41  type DataWithRange struct {
    42  	Range *common.IndexRange
    43  	Data  []byte
    44  }
    45  
    46  type DiskUploadContext struct {
    47  	VhdStream             *diskstream.DiskStream    // The stream whose ranges needs to be uploaded
    48  	AlreadyProcessedBytes int64                     // The size in bytes already uploaded
    49  	UploadableRanges      []*common.IndexRange      // The subset of stream ranges to be uploaded
    50  	BlobServiceClient     storage.BlobStorageClient // The client to make Azure blob service API calls
    51  	ContainerName         string                    // The container in which page blob resides
    52  	BlobName              string                    // The destination page blob name
    53  	Parallelism           int                       // The number of concurrent goroutines to be used for upload
    54  	Resume                bool                      // Indicate whether this is a new or resuming upload
    55  	MD5Hash               []byte                    // MD5Hash to be set in the page blob properties once upload finishes
    56  }
    57  
    58  // oneMB is one MegaByte
    59  //
    60  const oneMB = float64(1048576)
    61  
    62  // Upload uploads the disk ranges described by the parameter cxt, this parameter describes the disk stream to
    63  // read from, the ranges of the stream to read, the destination blob and it's container, the client to communicate
    64  // with Azure storage and the number of parallel go-routines to use for upload.
    65  //
    66  func Upload(cxt *DiskUploadContext, callback func(float32)) error {
    67  	// Get the channel that contains stream of disk data to upload
    68  	dataWithRangeChan, streamReadErrChan := GetDataWithRanges(cxt.VhdStream, cxt.UploadableRanges)
    69  
    70  	// The channel to send upload request to load-balancer
    71  	requtestChan := make(chan *concurrent.Request, 0)
    72  
    73  	// Prepare and start the load-balancer that load request across 'cxt.Parallelism' workers
    74  	loadBalancer := concurrent.NewBalancer(cxt.Parallelism)
    75  	loadBalancer.Init()
    76  	workerErrorChan, allWorkersFinishedChan := loadBalancer.Run(requtestChan)
    77  
    78  	// Calculate the actual size of the data to upload
    79  	uploadSizeInBytes := int64(0)
    80  	for _, r := range cxt.UploadableRanges {
    81  		uploadSizeInBytes += r.Length()
    82  	}
    83  	fmt.Printf("\nEffective upload size: %.2f MB (from %.2f MB originally)", float64(uploadSizeInBytes)/oneMB, float64(cxt.VhdStream.GetSize())/oneMB)
    84  
    85  	// Prepare and start the upload progress tracker
    86  	uploadProgress := progress.NewStatus(cxt.Parallelism, cxt.AlreadyProcessedBytes, uploadSizeInBytes, progress.NewComputestateDefaultSize())
    87  	progressChan := uploadProgress.Run()
    88  
    89  	// read progress status from progress tracker and print it
    90  	go readAndPrintProgress(progressChan, cxt.Resume, callback)
    91  
    92  	// listen for errors reported by workers and print it
    93  	var allWorkSucceeded = true
    94  	go func() {
    95  		for {
    96  			fmt.Println(<-workerErrorChan)
    97  			allWorkSucceeded = false
    98  		}
    99  	}()
   100  
   101  	var err error
   102  L:
   103  	for {
   104  		select {
   105  		case dataWithRange, ok := <-dataWithRangeChan:
   106  			if !ok {
   107  				close(requtestChan)
   108  				break L
   109  			}
   110  
   111  			// Create work request
   112  			//
   113  			containerClinet := cxt.BlobServiceClient.GetContainerReference(cxt.ContainerName)
   114  			blobClient := containerClinet.GetBlobReference(cxt.BlobName)
   115  			req := &concurrent.Request{
   116  				Work: func() error {
   117  					err := blobClient.WriteRange(
   118  						storage.BlobRange{Start: uint64(dataWithRange.Range.Start), End: uint64(dataWithRange.Range.End)},
   119  						bytes.NewReader(dataWithRange.Data),
   120  						&storage.PutPageOptions{},
   121  					)
   122  					if err == nil {
   123  						uploadProgress.ReportBytesProcessedCount(dataWithRange.Range.Length())
   124  					}
   125  					return err
   126  				},
   127  				ShouldRetry: func(e error) bool {
   128  					return true
   129  				},
   130  				ID: dataWithRange.Range.String(),
   131  			}
   132  
   133  			// Send work request to load balancer for processing
   134  			//
   135  			requtestChan <- req
   136  		case err = <-streamReadErrChan:
   137  			close(requtestChan)
   138  			loadBalancer.TearDownWorkers()
   139  			break L
   140  		}
   141  	}
   142  
   143  	<-allWorkersFinishedChan
   144  	uploadProgress.Close()
   145  
   146  	if !allWorkSucceeded {
   147  		err = errors.New("\nUpload Incomplete: Some blocks of the VHD failed to upload, rerun the command to upload those blocks")
   148  	}
   149  
   150  	if err == nil {
   151  		fmt.Printf("\r Completed: %3d%% [%10.2f MB] RemainingTime: %02dh:%02dm:%02ds Throughput: %d Mb/sec  %2c ",
   152  			100,
   153  			float64(uploadSizeInBytes)/oneMB,
   154  			0, 0, 0,
   155  			0, ' ')
   156  
   157  	}
   158  	return err
   159  }
   160  
   161  // GetDataWithRanges with start reading and streaming the ranges from the disk identified by the parameter ranges.
   162  // It returns two channels, a data channel to stream the disk ranges and a channel to send any error while reading
   163  // the disk. On successful completion the data channel will be closed. the caller must not expect any more value in
   164  // the data channel if the error channel is signaled.
   165  //
   166  func GetDataWithRanges(stream *diskstream.DiskStream, ranges []*common.IndexRange) (<-chan *DataWithRange, <-chan error) {
   167  	dataWithRangeChan := make(chan *DataWithRange, 0)
   168  	errorChan := make(chan error, 0)
   169  	go func() {
   170  		for _, r := range ranges {
   171  			dataWithRange := &DataWithRange{
   172  				Range: r,
   173  				Data:  make([]byte, r.Length()),
   174  			}
   175  			_, err := stream.Seek(r.Start, 0)
   176  			if err != nil {
   177  				errorChan <- err
   178  				return
   179  			}
   180  			_, err = io.ReadFull(stream, dataWithRange.Data)
   181  			if err != nil {
   182  				errorChan <- err
   183  				return
   184  			}
   185  			dataWithRangeChan <- dataWithRange
   186  		}
   187  		close(dataWithRangeChan)
   188  	}()
   189  	return dataWithRangeChan, errorChan
   190  }
   191  
   192  // readAndPrintProgress reads the progress records from the given progress channel and output it. It reads the
   193  // progress record until the channel is closed.
   194  //
   195  func readAndPrintProgress(progressChan <-chan *progress.Record, resume bool, callback func(float32)) {
   196  	var spinChars = [4]rune{'\\', '|', '/', '-'}
   197  	s := time.Time{}
   198  	if resume {
   199  		fmt.Println("\nResuming VHD upload..")
   200  	} else {
   201  		fmt.Println("\nUploading the VHD..")
   202  	}
   203  
   204  	i := 0
   205  	for progressRecord := range progressChan {
   206  		if i == 4 {
   207  			i = 0
   208  		}
   209  		t := s.Add(progressRecord.RemainingDuration)
   210  		fmt.Printf("\r Completed: %3d%% [%10.2f MB] RemainingTime: %02dh:%02dm:%02ds Throughput: %d Mb/sec  %2c ",
   211  			int(progressRecord.PercentComplete),
   212  			float64(progressRecord.BytesProcessed)/oneMB,
   213  			t.Hour(), t.Minute(), t.Second(),
   214  			int(progressRecord.AverageThroughputMbPerSecond),
   215  			spinChars[i],
   216  		)
   217  		if callback != nil {
   218  			callback(33.0 + float32(progressRecord.PercentComplete*0.33))
   219  		}
   220  		i++
   221  	}
   222  }
   223  
   224  func ensureVHDSanity(localVHDPath string) error {
   225  	if err := validator.ValidateVhd(localVHDPath); err != nil {
   226  		return err
   227  	}
   228  
   229  	if err := validator.ValidateVhdSize(localVHDPath); err != nil {
   230  		return err
   231  	}
   232  	return nil
   233  }
   234  
   235  func LocateUploadableRanges(stream *diskstream.DiskStream, rangesToSkip []*common.IndexRange, pageSizeInBytes int64) ([]*common.IndexRange, error) {
   236  	var err error
   237  	var diskRanges = make([]*common.IndexRange, 0)
   238  	stream.EnumerateExtents(func(ext *diskstream.StreamExtent, extErr error) bool {
   239  		if extErr != nil {
   240  			err = extErr
   241  			return false
   242  		}
   243  
   244  		diskRanges = append(diskRanges, ext.Range)
   245  		return true
   246  	})
   247  
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  
   252  	diskRanges = common.SubtractRanges(diskRanges, rangesToSkip)
   253  	diskRanges = common.ChunkRangesBySize(diskRanges, pageSizeInBytes)
   254  	return diskRanges, nil
   255  }
   256  
   257  func DetectEmptyRanges(diskStream *diskstream.DiskStream, uploadableRanges []*common.IndexRange) ([]*common.IndexRange, error) {
   258  	if diskStream.GetDiskType() != footer.DiskTypeFixed {
   259  		return uploadableRanges, nil
   260  	}
   261  
   262  	fmt.Println("\nDetecting empty ranges..")
   263  	totalRangesCount := len(uploadableRanges)
   264  	lastIndex := int32(-1)
   265  	emptyRangesCount := int32(0)
   266  	bits := make([]byte, int32(math.Ceil(float64(totalRangesCount)/float64(8))))
   267  	bmap := bitmap.NewBitMapFromByteSliceCopy(bits)
   268  	indexChan, errChan := LocateNonEmptyRangeIndices(diskStream, uploadableRanges)
   269  L:
   270  	for {
   271  		select {
   272  		case index, ok := <-indexChan:
   273  			if !ok {
   274  				break L
   275  			}
   276  			bmap.Set(index, true)
   277  			emptyRangesCount += index - lastIndex - 1
   278  			lastIndex = index
   279  			fmt.Printf("\r Empty ranges : %d/%d", emptyRangesCount, totalRangesCount)
   280  		case err := <-errChan:
   281  			return nil, err
   282  		}
   283  	}
   284  
   285  	// Remove empty ranges from the uploadable ranges slice.
   286  	i := int32(0)
   287  	for j := 0; j < totalRangesCount; j++ {
   288  		if set, _ := bmap.Get(int32(j)); set {
   289  			uploadableRanges[i] = uploadableRanges[j]
   290  			i++
   291  		}
   292  	}
   293  	uploadableRanges = uploadableRanges[:i]
   294  	return uploadableRanges, nil
   295  }
   296  
   297  func LocateNonEmptyRangeIndices(stream *diskstream.DiskStream, ranges []*common.IndexRange) (<-chan int32, <-chan error) {
   298  	indexChan := make(chan int32, 0)
   299  	errorChan := make(chan error, 0)
   300  	go func() {
   301  		count := int64(-1)
   302  		var buf []byte
   303  		for index, r := range ranges {
   304  			if count != r.Length() {
   305  				count = r.Length()
   306  				buf = make([]byte, count)
   307  			}
   308  
   309  			_, err := stream.Seek(r.Start, 0)
   310  			if err != nil {
   311  				errorChan <- err
   312  				return
   313  			}
   314  			_, err = io.ReadFull(stream, buf)
   315  			if err != nil {
   316  				errorChan <- err
   317  				return
   318  			}
   319  			if !isAllZero(buf) {
   320  				indexChan <- int32(index)
   321  			}
   322  		}
   323  		close(indexChan)
   324  	}()
   325  	return indexChan, errorChan
   326  }
   327  
   328  // isAllZero returns true if the given byte slice contain all zeros
   329  //
   330  func isAllZero(buf []byte) bool {
   331  	l := len(buf)
   332  	j := 0
   333  	for ; j < l; j++ {
   334  		if buf[j] != byte(0) {
   335  			break
   336  		}
   337  	}
   338  	return j == l
   339  }