github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/ai/parse_gguf.go (about)

     1  package ai
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"io"
     7  
     8  	gguf_parser "github.com/gpustack/gguf-parser-go"
     9  )
    10  
    11  // GGUF file format constants
    12  const (
    13  	ggufMagicNumber = 0x46554747       // "GGUF" in little-endian
    14  	maxHeaderSize   = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
    15  )
    16  
    17  // copyHeader copies the GGUF header from the reader to the writer.
    18  // It validates the magic number first, then copies the rest of the data.
    19  // The reader should be wrapped with io.LimitedReader to prevent OOM issues.
    20  func copyHeader(w io.Writer, r io.Reader) error {
    21  	// Read initial chunk to validate magic number
    22  	// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
    23  	initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
    24  	if _, err := io.ReadFull(r, initialBuf); err != nil {
    25  		return fmt.Errorf("failed to read GGUF header prefix: %w", err)
    26  	}
    27  
    28  	// Verify magic number
    29  	magic := binary.LittleEndian.Uint32(initialBuf[0:4])
    30  	if magic != ggufMagicNumber {
    31  		return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
    32  	}
    33  
    34  	// Write the initial buffer to the writer
    35  	if _, err := w.Write(initialBuf); err != nil {
    36  		return fmt.Errorf("failed to write GGUF header prefix: %w", err)
    37  	}
    38  
    39  	// Copy the rest of the header from reader to writer
    40  	// The LimitedReader will return EOF once maxHeaderSize is reached
    41  	if _, err := io.Copy(w, r); err != nil {
    42  		return fmt.Errorf("failed to copy GGUF header: %w", err)
    43  	}
    44  
    45  	return nil
    46  }
    47  
    48  // Helper to convert gguf_parser metadata to simpler types
    49  func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
    50  	result := make(map[string]interface{})
    51  
    52  	for _, kv := range kvs {
    53  		// Skip standard fields that are extracted separately
    54  		switch kv.Key {
    55  		case "general.architecture", "general.name", "general.license",
    56  			"general.version", "general.parameter_count", "general.quantization":
    57  			continue
    58  		}
    59  		result[kv.Key] = kv.Value
    60  	}
    61  
    62  	return result
    63  }