github.com/GoogleCloudPlatform/compute-image-tools/cli_tools@v0.0.0-20240516224744-de2dabc4ed1b/common/imagefile/qemu_img.go (about)

     1  //  Copyright 2020 Google Inc. All Rights Reserved.
     2  //
     3  //  Licensed under the Apache License, Version 2.0 (the "License");
     4  //  you may not use this file except in compliance with the License.
     5  //  You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  //  Unless required by applicable law or agreed to in writing, software
    10  //  distributed under the License is distributed on an "AS IS" BASIS,
    11  //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  //  See the License for the specific language governing permissions and
    13  //  limitations under the License.
    14  
    15  package imagefile
    16  
    17  import (
    18  	"context"
    19  	"crypto/md5"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"os/exec"
    26  	"strings"
    27  
    28  	daisy "github.com/GoogleCloudPlatform/compute-daisy"
    29  
    30  	"github.com/GoogleCloudPlatform/compute-image-tools/cli_tools/common/utils/files"
    31  	pathutils "github.com/GoogleCloudPlatform/compute-image-tools/cli_tools/common/utils/path"
    32  	"github.com/GoogleCloudPlatform/compute-image-tools/cli_tools/common/utils/shell"
    33  )
    34  
    35  // FormatUnknown means that qemu-img could not determine the file's format.
    36  const FormatUnknown string = "unknown"
    37  
    38  // The output of `qemu-img --help` contains this list.
    39  var qemuImgFormats = strings.Split("blkdebug blklogwrites blkreplay blkverify bochs cloop "+
    40  	"copy-on-read dmg file ftp ftps gluster host_cdrom host_device http "+
    41  	"https iscsi iser luks nbd nfs null-aio null-co nvme parallels qcow "+
    42  	"qcow2 qed quorum raw rbd replication sheepdog ssh throttle vdi vhdx vmdk vpc vvfat", " ")
    43  
    44  // ImageInfo includes metadata returned by `qemu-img info`.
    45  type ImageInfo struct {
    46  	Format           string
    47  	ActualSizeBytes  int64
    48  	VirtualSizeBytes int64
    49  	// This checksum is calculated from the partial disk content extracted by QEMU.
    50  	Checksum string
    51  }
    52  
    53  // InfoClient runs `qemu-img info` and returns the results.
    54  type InfoClient interface {
    55  	GetInfo(ctx context.Context, filename string) (ImageInfo, error)
    56  }
    57  
    58  // NewInfoClient returns a new instance of InfoClient.
    59  func NewInfoClient() InfoClient {
    60  	return defaultInfoClient{shell.NewShellExecutor(), "out" + pathutils.RandString(5)}
    61  }
    62  
    63  type defaultInfoClient struct {
    64  	shellExecutor    shell.Executor
    65  	tmpOutFilePrefix string
    66  }
    67  
    68  type fileInfoJSONTemplate struct {
    69  	Filename         string `json:"filename"`
    70  	Format           string `json:"format"`
    71  	ActualSizeBytes  int64  `json:"actual-size"`
    72  	VirtualSizeBytes int64  `json:"virtual-size"`
    73  }
    74  
    75  func (client defaultInfoClient) GetInfo(ctx context.Context, filename string) (info ImageInfo, err error) {
    76  	if !files.Exists(filename) {
    77  		err = fmt.Errorf("file %q not found", filename)
    78  		return
    79  	}
    80  
    81  	jsonTemplate, err := client.getFileInfo(ctx, filename)
    82  	if err != nil {
    83  		err = daisy.Errf("Failed to inspect file %v: %v", filename, err)
    84  		return
    85  	}
    86  	info.Format = lookupFileFormat(jsonTemplate.Format)
    87  	info.ActualSizeBytes = jsonTemplate.ActualSizeBytes
    88  	info.VirtualSizeBytes = jsonTemplate.VirtualSizeBytes
    89  
    90  	checksum, err := client.getFileChecksum(ctx, filename, info.VirtualSizeBytes)
    91  	if err != nil {
    92  		err = daisy.Errf("Failed to calculate file '%v' checksum by qemu: %v", filename, err)
    93  		return
    94  	}
    95  
    96  	info.Checksum = checksum
    97  	return
    98  }
    99  
   100  func (client defaultInfoClient) getFileInfo(ctx context.Context, filename string) (*fileInfoJSONTemplate, error) {
   101  	cmd := exec.CommandContext(ctx, "qemu-img", "info", "--output=json", filename)
   102  	out, err := cmd.Output()
   103  	err = constructCmdErr(string(out), err, "inspection failure")
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	jsonTemplate := fileInfoJSONTemplate{}
   109  	if err = json.Unmarshal(out, &jsonTemplate); err != nil {
   110  		return nil, daisy.Errf("failed to inspect %q: %w", filename, err)
   111  	}
   112  	return &jsonTemplate, err
   113  }
   114  
   115  func (client defaultInfoClient) getFileChecksum(ctx context.Context, filename string, virtualSizeBytes int64) (checksum string, err error) {
   116  	// We calculate 4 chunks' checksum. Each of them is 100MB: 0~100MB, 0.9GB~1GB, 9.9GB~10GB, the last 100MB.
   117  	// It is align with what we did for "daisy_workflows/image_import/import_image.sh" so that we can compare them.
   118  	// Each block size is 512 Bytes. So, we need to check 20000 blocks: 200000 * 512 Bytes = 100MB
   119  	// "skips" is also the start point of each chunks.
   120  	checkBlockCount := int64(200000)
   121  	blockSize := int64(512)
   122  	totalBlockCount := virtualSizeBytes / blockSize
   123  	skips := []int64{0, int64(2000000) - checkBlockCount, int64(20000000) - checkBlockCount, totalBlockCount - checkBlockCount}
   124  	for i, skip := range skips {
   125  		tmpOutFileName := fmt.Sprintf("%v%v", client.tmpOutFilePrefix, i)
   126  		defer os.Remove(tmpOutFileName)
   127  
   128  		if skip < 0 {
   129  			skip = 0
   130  		}
   131  
   132  		// Write 100MB data to a file.
   133  		var out string
   134  		out, err = client.shellExecutor.Exec("qemu-img", "dd", fmt.Sprintf("if=%v", filename),
   135  			fmt.Sprintf("of=%v", tmpOutFileName), fmt.Sprintf("bs=%v", blockSize),
   136  			fmt.Sprintf("count=%v", skip+checkBlockCount), fmt.Sprintf("skip=%v", skip))
   137  		err = constructCmdErr(out, err, "inspection for checksum failure")
   138  		if err != nil {
   139  			return
   140  		}
   141  
   142  		// Calculate checksum for the 100MB file.
   143  		f, fileErr := os.Open(tmpOutFileName)
   144  		if fileErr != nil {
   145  			err = daisy.Errf("Failed to open file '%v' for QEMU md5 checksum calculation: %v", tmpOutFileName, fileErr)
   146  			return
   147  		}
   148  		defer f.Close()
   149  		h := md5.New()
   150  		if _, md5Err := io.Copy(h, f); md5Err != nil {
   151  			err = daisy.Errf("Failed to copy data from file '%v' for QEMU md5 checksum calculation: %v", tmpOutFileName, md5Err)
   152  			return
   153  		}
   154  		newChecksum := fmt.Sprintf("%x", h.Sum(nil))
   155  
   156  		if checksum != "" {
   157  			checksum += "-"
   158  		}
   159  		checksum += newChecksum
   160  	}
   161  	return
   162  }
   163  
   164  func constructCmdErr(out string, err error, errorFormat string) error {
   165  	if err == nil {
   166  		return nil
   167  	}
   168  
   169  	var exitError *exec.ExitError
   170  	if errors.As(err, &exitError) {
   171  		return daisy.Errf("%v: '%w', stderr: '%s', out: '%s'", errorFormat, err, exitError.Stderr, out)
   172  	}
   173  	return daisy.Errf("%v: '%w', out: '%s'", errorFormat, err, out)
   174  }
   175  
   176  func lookupFileFormat(s string) string {
   177  	lower := strings.ToLower(s)
   178  	for _, format := range qemuImgFormats {
   179  		if format == lower {
   180  			return format
   181  		}
   182  	}
   183  	return FormatUnknown
   184  }