github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/embeddedfs/vdi/vdi.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package vdi provides an extractor for extracting software inventories from VirtualBox's VDI disk images
    16  package vdi
    17  
    18  import (
    19  	"context"
    20  	"encoding/binary"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"strings"
    26  	"sync"
    27  
    28  	cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common"
    31  	"github.com/google/osv-scalibr/inventory"
    32  	"github.com/google/osv-scalibr/plugin"
    33  )
    34  
    35  const (
    36  	// Name is the unique identifier for the vdi extractor.
    37  	Name = "embeddedfs/vdi"
    38  	// Signature is always 0xBEDA107F.
    39  	// Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L107
    40  	// Reference : https://forums.virtualbox.org/viewtopic.php?t=8046
    41  	Signature = 0xBEDA107F
    42  )
    43  
    44  // header describes the on-disk VDI header structure.
    45  type header struct {
    46  	Text            [0x40]byte
    47  	Signature       uint32
    48  	Version         uint32
    49  	HeaderSize      uint32
    50  	ImageType       uint32
    51  	ImageFlags      uint32
    52  	Description     [256]byte
    53  	OffsetBmap      uint32
    54  	OffsetData      uint32
    55  	Cylinders       uint32
    56  	Heads           uint32
    57  	Sectors         uint32
    58  	SectorSize      uint32
    59  	Unused1         uint32
    60  	DiskSize        uint64
    61  	BlockSize       uint32
    62  	BlockExtra      uint32
    63  	BlocksInImage   uint32
    64  	BlocksAllocated uint32
    65  	UUIDImage       [16]byte
    66  	UUIDLastSnap    [16]byte
    67  	UUIDLink        [16]byte
    68  	UUIDParent      [16]byte
    69  	Unused2         [7]uint64
    70  }
    71  
    72  // Extractor implements the filesystem.Extractor interface for vdi.
    73  type Extractor struct {
    74  	// maxFileSizeBytes is the maximum size of an archive file that can be traversed.
    75  	// If this limit is greater than zero and a file is encountered that is larger
    76  	// than this limit, the file is ignored.
    77  	maxFileSizeBytes int64
    78  }
    79  
    80  // New returns a new VDI extractor.
    81  // New returns a new archive extractor.
    82  func New(cfg *cpb.PluginConfig) filesystem.Extractor {
    83  	maxSize := cfg.MaxFileSizeBytes
    84  	specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.VDIConfig { return c.GetVdi() })
    85  	if specific.GetMaxFileSizeBytes() > 0 {
    86  		maxSize = specific.GetMaxFileSizeBytes()
    87  	}
    88  	return &Extractor{maxFileSizeBytes: maxSize}
    89  }
    90  
    91  // Name returns the name of the extractor.
    92  func (e *Extractor) Name() string {
    93  	return Name
    94  }
    95  
    96  // Version returns the version of the extractor.
    97  func (e *Extractor) Version() int {
    98  	return 0
    99  }
   100  
   101  // Requirements returns the requirements for the extractor.
   102  func (e *Extractor) Requirements() *plugin.Capabilities {
   103  	return &plugin.Capabilities{}
   104  }
   105  
   106  // FileRequired checks if the file is a .vdi file based on its extension.
   107  func (e *Extractor) FileRequired(api filesystem.FileAPI) bool {
   108  	path := api.Path()
   109  	if !strings.HasSuffix(strings.ToLower(path), ".vdi") {
   110  		return false
   111  	}
   112  
   113  	fileinfo, err := api.Stat()
   114  	if err != nil {
   115  		return false
   116  	}
   117  
   118  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   119  		return false
   120  	}
   121  
   122  	return true
   123  }
   124  
   125  // Extract returns an Inventory with embedded filesystems which contains mount functions for each filesystem in the .vdi file.
   126  func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   127  	// Check wether input.Reader is nil or not.
   128  	if input.Reader == nil {
   129  		return inventory.Inventory{}, errors.New("input.Reader is nil")
   130  	}
   131  
   132  	// Create a temporary file for the raw disk image
   133  	tmpRaw, err := os.CreateTemp("", "scalibr-vdi-raw-*.raw")
   134  	if err != nil {
   135  		return inventory.Inventory{}, fmt.Errorf("failed to create temporary raw file: %w", err)
   136  	}
   137  	tmpRawPath := tmpRaw.Name()
   138  
   139  	// Convert VDI to raw
   140  	if err := convertVDIToRaw(input.Reader, tmpRaw); err != nil {
   141  		os.Remove(tmpRawPath)
   142  		return inventory.Inventory{}, fmt.Errorf("failed to convert %s to raw image: %w", input.Path, err)
   143  	}
   144  
   145  	// Retrieve all partitions and the associated disk handle from the raw disk image.
   146  	partitionList, disk, err := common.GetDiskPartitions(tmpRawPath)
   147  	if err != nil {
   148  		disk.Close()
   149  		os.Remove(tmpRawPath)
   150  		return inventory.Inventory{}, err
   151  	}
   152  
   153  	// Create a reference counter for the temporary file
   154  	var refCount int32
   155  	var refMu sync.Mutex
   156  
   157  	// Create an Embedded filesystem for each valid partition
   158  	var embeddedFSs []*inventory.EmbeddedFS
   159  	for i, p := range partitionList {
   160  		partitionIndex := i + 1 // go-diskfs uses 1-based indexing
   161  		getEmbeddedFS := common.NewPartitionEmbeddedFSGetter("vdi", partitionIndex, p, disk, tmpRawPath, &refMu, &refCount)
   162  		embeddedFSs = append(embeddedFSs, &inventory.EmbeddedFS{
   163  			Path:          fmt.Sprintf("%s:%d", input.Path, partitionIndex),
   164  			GetEmbeddedFS: getEmbeddedFS,
   165  		})
   166  	}
   167  	return inventory.Inventory{EmbeddedFSs: embeddedFSs}, nil
   168  }
   169  
   170  // VDI conversion functions
   171  
   172  // convertVDIToRaw converts a VDI image to a raw image using streaming I/O only (no Seek).
   173  func convertVDIToRaw(in io.Reader, out io.Writer) error {
   174  	var hdr header
   175  	if err := binary.Read(in, binary.LittleEndian, &hdr); err != nil {
   176  		return fmt.Errorf("failed to read VDI header: %w", err)
   177  	}
   178  
   179  	// Sanity check: VDI signature should be 0xBEDA107F
   180  	if hdr.Signature != Signature {
   181  		return errors.New("not a valid VDI file (bad signature)")
   182  	}
   183  
   184  	curPos := int64(binary.Size(hdr))
   185  
   186  	switch hdr.ImageType {
   187  	// dynamic / sparse
   188  	// Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L114
   189  	case 1:
   190  		// Skip to block map
   191  		if int64(hdr.OffsetBmap) > curPos {
   192  			if err := skipBytes(in, int64(hdr.OffsetBmap)-curPos); err != nil {
   193  				return fmt.Errorf("failed to skip to block map: %w", err)
   194  			}
   195  			curPos = int64(hdr.OffsetBmap)
   196  		}
   197  
   198  		indices := make([]uint32, hdr.BlocksInImage)
   199  		if err := binary.Read(in, binary.LittleEndian, &indices); err != nil {
   200  			return fmt.Errorf("failed to read block map: %w", err)
   201  		}
   202  		curPos += int64(4 * len(indices))
   203  
   204  		stride := uint64(hdr.BlockSize) + uint64(hdr.BlockExtra)
   205  		for i := range indices {
   206  			virtOffset := uint64(i) * uint64(hdr.BlockSize)
   207  			writeSize := uint64(hdr.BlockSize)
   208  			if virtOffset+writeSize > hdr.DiskSize {
   209  				writeSize = hdr.DiskSize - virtOffset
   210  			}
   211  
   212  			idx := indices[i]
   213  			// Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L125-L131
   214  			if idx == 0xFFFFFFFF || idx == 0xFFFFFFFE {
   215  				// unallocated/discarded: write zeros
   216  				if err := writeZeros(out, int64(writeSize)); err != nil {
   217  					return err
   218  				}
   219  				continue
   220  			}
   221  
   222  			// Physical location of block
   223  			phys := int64(hdr.OffsetData) + int64(uint64(idx)*stride) + int64(hdr.BlockExtra)
   224  			if phys > curPos {
   225  				if err := skipBytes(in, phys-curPos); err != nil {
   226  					return fmt.Errorf("failed to skip to data block: %w", err)
   227  				}
   228  				curPos = phys
   229  			}
   230  
   231  			n, err := io.CopyN(out, in, int64(writeSize))
   232  			curPos += n
   233  			if err != nil {
   234  				return fmt.Errorf("failed to read data block: %w", err)
   235  			}
   236  		}
   237  		return nil
   238  
   239  	// static / fixed
   240  	// Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L115
   241  	case 2:
   242  		if int64(hdr.OffsetData) > curPos {
   243  			if err := skipBytes(in, int64(hdr.OffsetData)-curPos); err != nil {
   244  				return err
   245  			}
   246  		}
   247  		_, err := io.CopyN(out, in, int64(hdr.DiskSize))
   248  		if err != nil && !errors.Is(err, io.EOF) {
   249  			return err
   250  		}
   251  		return nil
   252  
   253  	default:
   254  		return fmt.Errorf("unsupported VDI type %d", hdr.ImageType)
   255  	}
   256  }
   257  
   258  func writeZeros(w io.Writer, n int64) error {
   259  	buf := make([]byte, 64*1024)
   260  	for n > 0 {
   261  		chunk := min(int64(len(buf)), n)
   262  		if _, err := w.Write(buf[:chunk]); err != nil {
   263  			return err
   264  		}
   265  		n -= chunk
   266  	}
   267  	return nil
   268  }
   269  
   270  func skipBytes(r io.Reader, n int64) error {
   271  	buf := make([]byte, 64*1024)
   272  	for n > 0 {
   273  		chunk := min(int64(len(buf)), n)
   274  		_, err := io.CopyN(io.Discard, r, chunk)
   275  		if err != nil {
   276  			return err
   277  		}
   278  		n -= chunk
   279  	}
   280  	return nil
   281  }