github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/embeddedfs/vdi/vdi.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package vdi provides an extractor for extracting software inventories from VirtualBox's VDI disk images 16 package vdi 17 18 import ( 19 "context" 20 "encoding/binary" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "strings" 26 "sync" 27 28 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 29 "github.com/google/osv-scalibr/extractor/filesystem" 30 "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" 31 "github.com/google/osv-scalibr/inventory" 32 "github.com/google/osv-scalibr/plugin" 33 ) 34 35 const ( 36 // Name is the unique identifier for the vdi extractor. 37 Name = "embeddedfs/vdi" 38 // Signature is always 0xBEDA107F. 39 // Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L107 40 // Reference : https://forums.virtualbox.org/viewtopic.php?t=8046 41 Signature = 0xBEDA107F 42 ) 43 44 // header describes the on-disk VDI header structure. 45 type header struct { 46 Text [0x40]byte 47 Signature uint32 48 Version uint32 49 HeaderSize uint32 50 ImageType uint32 51 ImageFlags uint32 52 Description [256]byte 53 OffsetBmap uint32 54 OffsetData uint32 55 Cylinders uint32 56 Heads uint32 57 Sectors uint32 58 SectorSize uint32 59 Unused1 uint32 60 DiskSize uint64 61 BlockSize uint32 62 BlockExtra uint32 63 BlocksInImage uint32 64 BlocksAllocated uint32 65 UUIDImage [16]byte 66 UUIDLastSnap [16]byte 67 UUIDLink [16]byte 68 UUIDParent [16]byte 69 Unused2 [7]uint64 70 } 71 72 // Extractor implements the filesystem.Extractor interface for vdi. 73 type Extractor struct { 74 // maxFileSizeBytes is the maximum size of an archive file that can be traversed. 75 // If this limit is greater than zero and a file is encountered that is larger 76 // than this limit, the file is ignored. 77 maxFileSizeBytes int64 78 } 79 80 // New returns a new VDI extractor. 81 // New returns a new archive extractor. 82 func New(cfg *cpb.PluginConfig) filesystem.Extractor { 83 maxSize := cfg.MaxFileSizeBytes 84 specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.VDIConfig { return c.GetVdi() }) 85 if specific.GetMaxFileSizeBytes() > 0 { 86 maxSize = specific.GetMaxFileSizeBytes() 87 } 88 return &Extractor{maxFileSizeBytes: maxSize} 89 } 90 91 // Name returns the name of the extractor. 92 func (e *Extractor) Name() string { 93 return Name 94 } 95 96 // Version returns the version of the extractor. 97 func (e *Extractor) Version() int { 98 return 0 99 } 100 101 // Requirements returns the requirements for the extractor. 102 func (e *Extractor) Requirements() *plugin.Capabilities { 103 return &plugin.Capabilities{} 104 } 105 106 // FileRequired checks if the file is a .vdi file based on its extension. 107 func (e *Extractor) FileRequired(api filesystem.FileAPI) bool { 108 path := api.Path() 109 if !strings.HasSuffix(strings.ToLower(path), ".vdi") { 110 return false 111 } 112 113 fileinfo, err := api.Stat() 114 if err != nil { 115 return false 116 } 117 118 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 119 return false 120 } 121 122 return true 123 } 124 125 // Extract returns an Inventory with embedded filesystems which contains mount functions for each filesystem in the .vdi file. 126 func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 127 // Check wether input.Reader is nil or not. 128 if input.Reader == nil { 129 return inventory.Inventory{}, errors.New("input.Reader is nil") 130 } 131 132 // Create a temporary file for the raw disk image 133 tmpRaw, err := os.CreateTemp("", "scalibr-vdi-raw-*.raw") 134 if err != nil { 135 return inventory.Inventory{}, fmt.Errorf("failed to create temporary raw file: %w", err) 136 } 137 tmpRawPath := tmpRaw.Name() 138 139 // Convert VDI to raw 140 if err := convertVDIToRaw(input.Reader, tmpRaw); err != nil { 141 os.Remove(tmpRawPath) 142 return inventory.Inventory{}, fmt.Errorf("failed to convert %s to raw image: %w", input.Path, err) 143 } 144 145 // Retrieve all partitions and the associated disk handle from the raw disk image. 146 partitionList, disk, err := common.GetDiskPartitions(tmpRawPath) 147 if err != nil { 148 disk.Close() 149 os.Remove(tmpRawPath) 150 return inventory.Inventory{}, err 151 } 152 153 // Create a reference counter for the temporary file 154 var refCount int32 155 var refMu sync.Mutex 156 157 // Create an Embedded filesystem for each valid partition 158 var embeddedFSs []*inventory.EmbeddedFS 159 for i, p := range partitionList { 160 partitionIndex := i + 1 // go-diskfs uses 1-based indexing 161 getEmbeddedFS := common.NewPartitionEmbeddedFSGetter("vdi", partitionIndex, p, disk, tmpRawPath, &refMu, &refCount) 162 embeddedFSs = append(embeddedFSs, &inventory.EmbeddedFS{ 163 Path: fmt.Sprintf("%s:%d", input.Path, partitionIndex), 164 GetEmbeddedFS: getEmbeddedFS, 165 }) 166 } 167 return inventory.Inventory{EmbeddedFSs: embeddedFSs}, nil 168 } 169 170 // VDI conversion functions 171 172 // convertVDIToRaw converts a VDI image to a raw image using streaming I/O only (no Seek). 173 func convertVDIToRaw(in io.Reader, out io.Writer) error { 174 var hdr header 175 if err := binary.Read(in, binary.LittleEndian, &hdr); err != nil { 176 return fmt.Errorf("failed to read VDI header: %w", err) 177 } 178 179 // Sanity check: VDI signature should be 0xBEDA107F 180 if hdr.Signature != Signature { 181 return errors.New("not a valid VDI file (bad signature)") 182 } 183 184 curPos := int64(binary.Size(hdr)) 185 186 switch hdr.ImageType { 187 // dynamic / sparse 188 // Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L114 189 case 1: 190 // Skip to block map 191 if int64(hdr.OffsetBmap) > curPos { 192 if err := skipBytes(in, int64(hdr.OffsetBmap)-curPos); err != nil { 193 return fmt.Errorf("failed to skip to block map: %w", err) 194 } 195 curPos = int64(hdr.OffsetBmap) 196 } 197 198 indices := make([]uint32, hdr.BlocksInImage) 199 if err := binary.Read(in, binary.LittleEndian, &indices); err != nil { 200 return fmt.Errorf("failed to read block map: %w", err) 201 } 202 curPos += int64(4 * len(indices)) 203 204 stride := uint64(hdr.BlockSize) + uint64(hdr.BlockExtra) 205 for i := range indices { 206 virtOffset := uint64(i) * uint64(hdr.BlockSize) 207 writeSize := uint64(hdr.BlockSize) 208 if virtOffset+writeSize > hdr.DiskSize { 209 writeSize = hdr.DiskSize - virtOffset 210 } 211 212 idx := indices[i] 213 // Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L125-L131 214 if idx == 0xFFFFFFFF || idx == 0xFFFFFFFE { 215 // unallocated/discarded: write zeros 216 if err := writeZeros(out, int64(writeSize)); err != nil { 217 return err 218 } 219 continue 220 } 221 222 // Physical location of block 223 phys := int64(hdr.OffsetData) + int64(uint64(idx)*stride) + int64(hdr.BlockExtra) 224 if phys > curPos { 225 if err := skipBytes(in, phys-curPos); err != nil { 226 return fmt.Errorf("failed to skip to data block: %w", err) 227 } 228 curPos = phys 229 } 230 231 n, err := io.CopyN(out, in, int64(writeSize)) 232 curPos += n 233 if err != nil { 234 return fmt.Errorf("failed to read data block: %w", err) 235 } 236 } 237 return nil 238 239 // static / fixed 240 // Reference : https://github.com/qemu/qemu/blob/master/block/vdi.c#L115 241 case 2: 242 if int64(hdr.OffsetData) > curPos { 243 if err := skipBytes(in, int64(hdr.OffsetData)-curPos); err != nil { 244 return err 245 } 246 } 247 _, err := io.CopyN(out, in, int64(hdr.DiskSize)) 248 if err != nil && !errors.Is(err, io.EOF) { 249 return err 250 } 251 return nil 252 253 default: 254 return fmt.Errorf("unsupported VDI type %d", hdr.ImageType) 255 } 256 } 257 258 func writeZeros(w io.Writer, n int64) error { 259 buf := make([]byte, 64*1024) 260 for n > 0 { 261 chunk := min(int64(len(buf)), n) 262 if _, err := w.Write(buf[:chunk]); err != nil { 263 return err 264 } 265 n -= chunk 266 } 267 return nil 268 } 269 270 func skipBytes(r io.Reader, n int64) error { 271 buf := make([]byte, 64*1024) 272 for n > 0 { 273 chunk := min(int64(len(buf)), n) 274 _, err := io.CopyN(io.Discard, r, chunk) 275 if err != nil { 276 return err 277 } 278 n -= chunk 279 } 280 return nil 281 }