github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/ai/parse_gguf.go (about) 1 package ai 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "io" 7 8 gguf_parser "github.com/gpustack/gguf-parser-go" 9 ) 10 11 // GGUF file format constants 12 const ( 13 ggufMagicNumber = 0x46554747 // "GGUF" in little-endian 14 maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies 15 ) 16 17 // copyHeader copies the GGUF header from the reader to the writer. 18 // It validates the magic number first, then copies the rest of the data. 19 // The reader should be wrapped with io.LimitedReader to prevent OOM issues. 20 func copyHeader(w io.Writer, r io.Reader) error { 21 // Read initial chunk to validate magic number 22 // GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info 23 initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count 24 if _, err := io.ReadFull(r, initialBuf); err != nil { 25 return fmt.Errorf("failed to read GGUF header prefix: %w", err) 26 } 27 28 // Verify magic number 29 magic := binary.LittleEndian.Uint32(initialBuf[0:4]) 30 if magic != ggufMagicNumber { 31 return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic) 32 } 33 34 // Write the initial buffer to the writer 35 if _, err := w.Write(initialBuf); err != nil { 36 return fmt.Errorf("failed to write GGUF header prefix: %w", err) 37 } 38 39 // Copy the rest of the header from reader to writer 40 // The LimitedReader will return EOF once maxHeaderSize is reached 41 if _, err := io.Copy(w, r); err != nil { 42 return fmt.Errorf("failed to copy GGUF header: %w", err) 43 } 44 45 return nil 46 } 47 48 // Helper to convert gguf_parser metadata to simpler types 49 func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} { 50 result := make(map[string]interface{}) 51 52 for _, kv := range kvs { 53 // Skip standard fields that are extracted separately 54 switch kv.Key { 55 case "general.architecture", "general.name", "general.license", 56 "general.version", "general.parameter_count", "general.quantization": 57 continue 58 } 59 result[kv.Key] = kv.Value 60 } 61 62 return result 63 }