github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/embeddedfs/vmdk/vmdk.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package vmdk provides an extractor for extracting software inventories from VMDK disk images 16 package vmdk 17 18 import ( 19 "bytes" 20 "compress/zlib" 21 "context" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 "io" 26 "os" 27 "path/filepath" 28 "strings" 29 "sync" 30 31 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 32 "github.com/google/osv-scalibr/extractor/filesystem" 33 "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" 34 "github.com/google/osv-scalibr/inventory" 35 "github.com/google/osv-scalibr/plugin" 36 ) 37 38 const ( 39 // Name is the unique identifier for the vmdk extractor. 40 Name = "embeddedfs/vmdk" 41 // SectorSize is the default sector size (512 bytes). 42 SectorSize = 512 43 // SparseMagic is always 'KDMV'. 44 SparseMagic = 0x564d444b 45 // GDAtEnd indicates that the Grain Directory is stored in the footer at the end of the VMDK file. 46 GDAtEnd = 0xFFFFFFFFFFFFFFFF 47 // DefaultGrainSec is default sectors if header invalid (64KiB). 48 DefaultGrainSec = 128 49 ) 50 51 // sparseExtentHeader defines the VMDK sparse extent header structure. 52 type sparseExtentHeader struct { 53 MagicNumber uint32 54 Version uint32 55 Flags uint32 56 Capacity uint64 57 GrainSize uint64 58 DescriptorOffset uint64 59 DescriptorSize uint64 60 NumGTEsPerGT uint32 61 RGDOffset uint64 62 GDOffset uint64 63 OverHead uint64 64 UncleanShutdown byte 65 SingleEndLineChar byte 66 NonEndLineChar byte 67 DoubleEndLineChar1 byte 68 DoubleEndLineChar2 byte 69 CompressAlgorithm uint16 70 Pad [433]byte 71 } 72 73 // gdgtInfo holds GD/GT allocation information. 74 type gdgtInfo struct { 75 GTEs uint64 76 GTs uint32 77 GDsectors uint32 78 GTsectors uint32 79 gd []uint32 80 } 81 82 // Extractor implements the filesystem.Extractor interface for vmdk. 83 type Extractor struct { 84 // maxFileSizeBytes is the maximum size of an archive file that can be traversed. 85 // If this limit is greater than zero and a file is encountered that is larger 86 // than this limit, the file is ignored. 87 maxFileSizeBytes int64 88 } 89 90 // New returns a new VMDK extractor. 91 func New(cfg *cpb.PluginConfig) filesystem.Extractor { 92 maxSize := cfg.MaxFileSizeBytes 93 specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.VMDKConfig { return c.GetVmdk() }) 94 if specific.GetMaxFileSizeBytes() > 0 { 95 maxSize = specific.GetMaxFileSizeBytes() 96 } 97 return &Extractor{maxFileSizeBytes: maxSize} 98 } 99 100 // Name returns the name of the extractor. 101 func (e *Extractor) Name() string { 102 return Name 103 } 104 105 // Version returns the version of the extractor. 106 func (e *Extractor) Version() int { 107 return 0 108 } 109 110 // Requirements returns the requirements for the extractor. 111 func (e *Extractor) Requirements() *plugin.Capabilities { 112 return &plugin.Capabilities{} 113 } 114 115 // FileRequired checks if the file is a .vmdk file based on its extension. 116 func (e *Extractor) FileRequired(api filesystem.FileAPI) bool { 117 path := api.Path() 118 if !strings.HasSuffix(strings.ToLower(path), ".vmdk") { 119 return false 120 } 121 122 fileinfo, err := api.Stat() 123 if err != nil { 124 return false 125 } 126 127 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 128 return false 129 } 130 131 return true 132 } 133 134 // Extract returns an Inventory with embedded filesystems which contains mount functions for each filesystem in the .vmdk file. 135 func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 136 vmdkPath, err := input.GetRealPath() 137 if err != nil { 138 return inventory.Inventory{}, fmt.Errorf("failed to get real path for %s: %w", input.Path, err) 139 } 140 // If called on a virtual FS, clean up the temporary directory 141 if input.Root == "" { 142 defer func() { 143 dir := filepath.Dir(vmdkPath) 144 if err := os.RemoveAll(dir); err != nil { 145 fmt.Printf("os.RemoveAll(%q): %v\n", dir, err) 146 } 147 }() 148 } 149 150 // Create a temporary file for the raw disk image 151 tmpRaw, err := os.CreateTemp("", "scalibr-vmdk-raw-*.raw") 152 if err != nil { 153 return inventory.Inventory{}, fmt.Errorf("failed to create temporary raw file: %w", err) 154 } 155 tmpRawPath := tmpRaw.Name() 156 157 // Convert VMDK to raw 158 if err := convertVMDKToRaw(vmdkPath, tmpRawPath); err != nil { 159 os.Remove(tmpRawPath) 160 return inventory.Inventory{}, fmt.Errorf("failed to convert %s to raw image: %w", vmdkPath, err) 161 } 162 163 // Retrieve all partitions and the associated disk handle from the raw disk image. 164 partitionList, disk, err := common.GetDiskPartitions(tmpRawPath) 165 if err != nil { 166 disk.Close() 167 os.Remove(tmpRawPath) 168 return inventory.Inventory{}, err 169 } 170 171 // Create a reference counter for the temporary file 172 var refCount int32 173 var refMu sync.Mutex 174 175 // Create an Embedded filesystem for each valid partition 176 var embeddedFSs []*inventory.EmbeddedFS 177 for i, p := range partitionList { 178 partitionIndex := i + 1 // go-diskfs uses 1-based indexing 179 getEmbeddedFS := common.NewPartitionEmbeddedFSGetter("vmdk", partitionIndex, p, disk, tmpRawPath, &refMu, &refCount) 180 embeddedFSs = append(embeddedFSs, &inventory.EmbeddedFS{ 181 Path: fmt.Sprintf("%s:%d", vmdkPath, partitionIndex), 182 GetEmbeddedFS: getEmbeddedFS, 183 }) 184 } 185 return inventory.Inventory{EmbeddedFSs: embeddedFSs}, nil 186 } 187 188 // VMDK conversion functions 189 190 // readHeaderAt reads the 512-byte header at the given offset. 191 func readHeaderAt(r io.ReaderAt, offset int64) (sparseExtentHeader, error) { 192 var hdr sparseExtentHeader 193 buf := make([]byte, SectorSize) 194 n, err := r.ReadAt(buf, offset) 195 if err != nil && !errors.Is(err, io.EOF) { 196 return hdr, fmt.Errorf("read header at %d: %w", offset, err) 197 } 198 if n < SectorSize { 199 return hdr, fmt.Errorf("short header read: %d bytes", n) 200 } 201 br := bytes.NewReader(buf) 202 if err := binary.Read(br, binary.LittleEndian, &hdr); err != nil { 203 return hdr, fmt.Errorf("parse header: %w", err) 204 } 205 if hdr.MagicNumber != SparseMagic { 206 return hdr, fmt.Errorf("invalid magic: 0x%x", hdr.MagicNumber) 207 } 208 return hdr, nil 209 } 210 211 // readFooterIfGDAtEnd reads the footer header near EOF if GDOffset is GDAtEnd. 212 func readFooterIfGDAtEnd(f *os.File, hdr *sparseExtentHeader) error { 213 if hdr.GDOffset != GDAtEnd { 214 return nil 215 } 216 fi, err := f.Stat() 217 if err != nil { 218 return err 219 } 220 if fi.Size() < 1536 { 221 return errors.New("file too small to contain footer/EOS") 222 } 223 base := fi.Size() - 1536 224 footerHeaderBlock := make([]byte, 512) 225 if _, err := f.ReadAt(footerHeaderBlock, base+512); err != nil { 226 return fmt.Errorf("read footer header block: %w", err) 227 } 228 if binary.LittleEndian.Uint32(footerHeaderBlock[0:4]) != SparseMagic { 229 return fmt.Errorf("footer magic mismatch: 0x%x", binary.LittleEndian.Uint32(footerHeaderBlock[0:4])) 230 } 231 var foot sparseExtentHeader 232 r := bytes.NewReader(footerHeaderBlock[4:]) 233 if err := binary.Read(r, binary.LittleEndian, &foot); err != nil { 234 return fmt.Errorf("parse footer header: %w", err) 235 } 236 *hdr = foot 237 return nil 238 } 239 240 // readStreamMarker reads a VMDK stream marker. 241 func readStreamMarker(f *os.File) (val uint64, size uint32, typ uint32, data []byte, err error) { 242 head := make([]byte, 12) 243 if _, err = io.ReadFull(f, head); err != nil { 244 return 0, 0, 0, nil, err 245 } 246 val = binary.LittleEndian.Uint64(head[0:8]) 247 size = binary.LittleEndian.Uint32(head[8:12]) 248 if size == 0 { 249 tb := make([]byte, 4) 250 if _, err = io.ReadFull(f, tb); err != nil { 251 return val, size, 0, nil, err 252 } 253 typ = binary.LittleEndian.Uint32(tb) 254 consumed := int64(16) 255 pad := (SectorSize - (consumed % SectorSize)) % SectorSize 256 if pad > 0 { 257 if _, err := f.Seek(pad, io.SeekCurrent); err != nil { 258 return val, size, typ, nil, err 259 } 260 } 261 return val, size, typ, nil, nil 262 } 263 if size > 0 { 264 data = make([]byte, size) 265 if _, err = io.ReadFull(f, data); err != nil { 266 return val, size, 0, nil, err 267 } 268 consumed := int64(12 + size) 269 pad := (SectorSize - (consumed % SectorSize)) % SectorSize 270 if pad > 0 { 271 if _, err := f.Seek(pad, io.SeekCurrent); err != nil { 272 return val, size, 0, nil, err 273 } 274 } 275 return val, size, 0, data, nil 276 } 277 return val, size, 0, nil, nil 278 } 279 280 // convertStreamOptimizedExtent converts a stream-optimized VMDK extent. 281 func convertStreamOptimizedExtent(f *os.File, out *os.File, hdr sparseExtentHeader) error { 282 if hdr.GDOffset == GDAtEnd { 283 if err := readFooterIfGDAtEnd(f, &hdr); err != nil { 284 return fmt.Errorf("read footer: %w", err) 285 } 286 } 287 grainSec := hdr.GrainSize 288 if grainSec == 0 || (grainSec&(grainSec-1)) != 0 { 289 grainSec = DefaultGrainSec 290 } 291 grainBytes := int64(grainSec) * SectorSize 292 start := int64(hdr.OverHead) * SectorSize 293 if _, err := f.Seek(start, io.SeekStart); err != nil { 294 return fmt.Errorf("seek to stream start: %w", err) 295 } 296 capacityBytes := int64(hdr.Capacity) * SectorSize 297 if err := out.Truncate(capacityBytes); err != nil { 298 return fmt.Errorf("truncate out: %w", err) 299 } 300 301 for { 302 val, size, typ, payload, err := readStreamMarker(f) 303 if err != nil { 304 if errors.Is(err, io.EOF) { 305 break 306 } 307 return fmt.Errorf("read marker: %w", err) 308 } 309 if size != 0 { 310 lba := int64(val) 311 woff := lba * SectorSize 312 if int64(size) == grainBytes { 313 if _, werr := out.WriteAt(payload, woff); werr != nil { 314 return fmt.Errorf("write raw grain at lba %d: %w", lba, werr) 315 } 316 } else if size < uint32(grainBytes) || size > uint32(grainBytes) { 317 zr, zerr := zlib.NewReader(bytes.NewReader(payload)) 318 if zerr != nil { 319 return fmt.Errorf("zlib reader at lba %d: %w", lba, zerr) 320 } 321 dec, derr := io.ReadAll(zr) 322 zr.Close() 323 if derr != nil && !errors.Is(derr, io.EOF) { 324 return fmt.Errorf("zlib read at lba %d: %w", lba, derr) 325 } 326 if int64(len(dec)) < grainBytes { 327 tmp := make([]byte, grainBytes) 328 copy(tmp, dec) 329 dec = tmp 330 } else if int64(len(dec)) > grainBytes { 331 tmp := make([]byte, int64(len(dec))+(-int64(len(dec))%grainBytes)) 332 copy(tmp, dec) 333 dec = tmp 334 } 335 if _, werr := out.WriteAt(dec, woff); werr != nil { 336 return fmt.Errorf("write decompressed grain at lba %d: %w", lba, werr) 337 } 338 } else { 339 return fmt.Errorf("invalid grain payload size %d > grainBytes %d", size, grainBytes) 340 } 341 continue 342 } 343 switch typ { 344 case 0: // EOS 345 return nil 346 case 1: // GT 347 if val > 0 { 348 if _, err := f.Seek(int64(val*SectorSize), io.SeekCurrent); err != nil { 349 return fmt.Errorf("skip GT metadata: %w", err) 350 } 351 } 352 case 2: // GD 353 if val > 0 { 354 if _, err := f.Seek(int64(val*SectorSize), io.SeekCurrent); err != nil { 355 return fmt.Errorf("skip GD metadata: %w", err) 356 } 357 } 358 case 3: // FOOTER 359 if val > 0 { 360 meta := make([]byte, int64(val*SectorSize)) 361 if _, err := io.ReadFull(f, meta); err != nil { 362 return fmt.Errorf("read footer meta: %w", err) 363 } 364 if len(meta) >= 4 && binary.LittleEndian.Uint32(meta[0:4]) == SparseMagic { 365 var foot sparseExtentHeader 366 br := bytes.NewReader(meta[4:]) 367 if err := binary.Read(br, binary.LittleEndian, &foot); err == nil { 368 hdr = foot 369 grainSec = hdr.GrainSize 370 if grainSec == 0 || (grainSec&(grainSec-1)) != 0 { 371 grainSec = DefaultGrainSec 372 } 373 grainBytes = int64(grainSec) * SectorSize 374 capacityBytes = int64(hdr.Capacity) * SectorSize 375 _ = out.Truncate(capacityBytes) 376 } 377 } 378 } 379 case 4: // PROGRESS 380 if val > 0 { 381 if _, err := f.Seek(int64(val*SectorSize), io.SeekCurrent); err != nil { 382 return fmt.Errorf("skip progress metadata: %w", err) 383 } 384 } 385 default: 386 if val > 0 { 387 if _, err := f.Seek(int64(val*SectorSize), io.SeekCurrent); err != nil { 388 return fmt.Errorf("skip unknown metadata type %d: %w", typ, err) 389 } 390 } 391 } 392 } 393 return nil 394 } 395 396 // getGDGT computes GD/GT sizes and allocates structures. 397 func getGDGT(hdr sparseExtentHeader) (*gdgtInfo, error) { 398 if hdr.GrainSize < 1 || hdr.GrainSize > 128 || (hdr.GrainSize&(hdr.GrainSize-1)) != 0 { 399 return nil, fmt.Errorf("invalid grainSize %d", hdr.GrainSize) 400 } 401 if hdr.NumGTEsPerGT < uint32(SectorSize/4) || (hdr.NumGTEsPerGT&(hdr.NumGTEsPerGT-1)) != 0 { 402 return nil, fmt.Errorf("invalid numGTEsPerGT %d", hdr.NumGTEsPerGT) 403 } 404 lastGrainNr := hdr.Capacity / hdr.GrainSize 405 var lastGrainSize uint64 406 if hdr.Capacity&(hdr.GrainSize-1) != 0 { 407 lastGrainSize = (hdr.Capacity & (hdr.GrainSize - 1)) * SectorSize 408 } else { 409 lastGrainSize = 0 410 } 411 GTEs := lastGrainNr 412 if lastGrainSize != 0 { 413 GTEs = lastGrainNr + 1 414 } 415 GTs := uint32((GTEs + uint64(hdr.NumGTEsPerGT) - 1) / uint64(hdr.NumGTEsPerGT)) 416 GDsectors := uint32((uint64(GTs)*4 + SectorSize - 1) / SectorSize) 417 GTsectors := uint32((uint64(hdr.NumGTEsPerGT)*4 + SectorSize - 1) / SectorSize) 418 totalSectors := int64(GDsectors + GTsectors*GTs) 419 totalBytes := totalSectors * SectorSize 420 if totalBytes > 1<<31 { 421 return nil, fmt.Errorf("gd/gt allocation too large: %d bytes", totalBytes) 422 } 423 gdarr := make([]uint32, (GDsectors*SectorSize)/4+(GTsectors*GTs*SectorSize)/4) 424 info := &gdgtInfo{ 425 GTEs: GTEs, 426 GTs: GTs, 427 GDsectors: GDsectors, 428 GTsectors: GTsectors, 429 gd: gdarr, 430 } 431 return info, nil 432 } 433 434 // readGD reads GD sectors from file. 435 func readGD(f *os.File, hdr sparseExtentHeader, info *gdgtInfo) error { 436 if hdr.GDOffset == 0 { 437 return errors.New("no GD offset") 438 } 439 start := int64(hdr.GDOffset) * SectorSize 440 totalBytes := int64(info.GDsectors) * SectorSize 441 buf := make([]byte, totalBytes) 442 if _, err := f.ReadAt(buf, start); err != nil { 443 return fmt.Errorf("read GD at %d: %w", start, err) 444 } 445 for i := range int(info.GDsectors * SectorSize / 4) { 446 info.gd[i] = binary.LittleEndian.Uint32(buf[i*4 : i*4+4]) 447 } 448 return nil 449 } 450 451 // convertMonolithicSparse converts a monolithic sparse VMDK. 452 func convertMonolithicSparse(f *os.File, out *os.File, hdr sparseExtentHeader) error { 453 info, err := getGDGT(hdr) 454 if err != nil { 455 return err 456 } 457 GDOffset := hdr.GDOffset 458 if hdr.RGDOffset != 0 { 459 GDOffset = hdr.RGDOffset 460 } 461 if GDOffset == 0 || GDOffset == GDAtEnd { 462 return errors.New("gd offset missing for monolithicSparse") 463 } 464 if err := readGD(f, hdr, info); err != nil { 465 return fmt.Errorf("readGD: %w", err) 466 } 467 grainBytes := int64(hdr.GrainSize) * SectorSize 468 totalGrains := int64((hdr.Capacity + hdr.GrainSize - 1) / hdr.GrainSize) 469 if err := out.Truncate(int64(hdr.Capacity) * SectorSize); err != nil { 470 return fmt.Errorf("truncate out: %w", err) 471 } 472 numGTEsPerGT := int64(hdr.NumGTEsPerGT) 473 for g := range totalGrains { 474 gdIdx := int(g / numGTEsPerGT) 475 gtIdx := int(g % numGTEsPerGT) 476 if gdIdx >= len(info.gd) { 477 zero := make([]byte, grainBytes) 478 if _, err := out.WriteAt(zero, g*grainBytes); err != nil { 479 return err 480 } 481 continue 482 } 483 gtSector := uint64(info.gd[gdIdx]) 484 if gtSector == 0 { 485 zero := make([]byte, grainBytes) 486 if _, err := out.WriteAt(zero, g*grainBytes); err != nil { 487 return err 488 } 489 continue 490 } 491 gtOffset := int64(gtSector) * SectorSize 492 gtSizeBytes := int64(info.GTsectors) * SectorSize 493 gtBuf := make([]byte, gtSizeBytes) 494 if _, err := f.ReadAt(gtBuf, gtOffset); err != nil { 495 return fmt.Errorf("read GT at %d: %w", gtOffset, err) 496 } 497 if gtIdx*4+4 > len(gtBuf) { 498 zero := make([]byte, grainBytes) 499 if _, err := out.WriteAt(zero, g*grainBytes); err != nil { 500 return err 501 } 502 continue 503 } 504 gte := binary.LittleEndian.Uint32(gtBuf[gtIdx*4 : gtIdx*4+4]) 505 if gte == 0 { 506 zero := make([]byte, grainBytes) 507 if _, err := out.WriteAt(zero, g*grainBytes); err != nil { 508 return err 509 } 510 continue 511 } 512 grainSector := int64(gte) 513 grainOffset := grainSector * SectorSize 514 var toRead = grainBytes 515 if g == totalGrains-1 { 516 lastSectors := int64(hdr.Capacity % hdr.GrainSize) 517 if lastSectors == 0 { 518 lastSectors = int64(hdr.GrainSize) 519 } 520 toRead = lastSectors * SectorSize 521 } 522 grainData := make([]byte, toRead) 523 if _, err := f.ReadAt(grainData, grainOffset); err != nil { 524 return fmt.Errorf("read grain at %d: %w", grainOffset, err) 525 } 526 if _, err := out.WriteAt(grainData, g*grainBytes); err != nil { 527 return fmt.Errorf("write grain at %d: %w", g*grainBytes, err) 528 } 529 } 530 return nil 531 } 532 533 // convertVMDKToRaw converts a VMDK file to a raw disk image. 534 func convertVMDKToRaw(inPath string, outPath string) error { 535 in, err := os.Open(inPath) 536 if err != nil { 537 return err 538 } 539 defer in.Close() 540 out, err := os.Create(outPath) 541 if err != nil { 542 return err 543 } 544 defer out.Close() 545 546 hdr, err := readHeaderAt(in, 0) 547 if err != nil { 548 fi, st := in.Stat() 549 if st != nil { 550 return fmt.Errorf("stat input: %w", st) 551 } 552 if fi.Size() >= 1024 { 553 offset := fi.Size() - 1024 554 hdr2, err2 := readHeaderAt(in, offset) 555 if err2 == nil { 556 hdr = hdr2 557 } else { 558 return fmt.Errorf("read header failed: %w", err) 559 } 560 } else { 561 return fmt.Errorf("read header failed: %w", err) 562 } 563 } 564 565 const flagHasCompressed = 1 << 16 566 const flagHasMetadata = 1 << 17 567 isStream := (hdr.Flags&flagHasCompressed != 0) && (hdr.Flags&flagHasMetadata != 0) 568 if hdr.CompressAlgorithm == 1 { 569 isStream = true 570 } 571 572 if isStream { 573 if err := convertStreamOptimizedExtent(in, out, hdr); err != nil { 574 return err 575 } 576 } else { 577 if err := convertMonolithicSparse(in, out, hdr); err != nil { 578 return err 579 } 580 } 581 return nil 582 }