github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/ai/parse_gguf_model.go (about) 1 package ai 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "sort" 11 "strings" 12 13 "github.com/cespare/xxhash/v2" 14 gguf_parser "github.com/gpustack/gguf-parser-go" 15 16 "github.com/anchore/syft/internal" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/internal/unknown" 19 "github.com/anchore/syft/syft/artifact" 20 "github.com/anchore/syft/syft/file" 21 "github.com/anchore/syft/syft/pkg" 22 "github.com/anchore/syft/syft/pkg/cataloger/generic" 23 ) 24 25 // parseGGUFModel parses a GGUF model file and returns the discovered package. 26 // This implementation only reads the header portion of the file, not the entire model. 27 func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 28 defer internal.CloseAndLogError(reader, reader.Path()) 29 30 // Create a temporary file for the library to parse 31 // The library requires a file path, so we create a temp file 32 tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf") 33 if err != nil { 34 return nil, nil, fmt.Errorf("failed to create temp file: %w", err) 35 } 36 tempPath := tempFile.Name() 37 defer os.Remove(tempPath) 38 39 // Copy and validate the GGUF file header using LimitedReader to prevent OOM 40 // We use LimitedReader to cap reads at maxHeaderSize (50MB) 41 limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize} 42 if err := copyHeader(tempFile, limitedReader); err != nil { 43 tempFile.Close() 44 return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err) 45 } 46 tempFile.Close() 47 48 // Parse using gguf-parser-go with options to skip unnecessary data 49 ggufFile, err := gguf_parser.ParseGGUFFile(tempPath, 50 gguf_parser.SkipLargeMetadata(), 51 ) 52 if err != nil { 53 return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err) 54 } 55 56 // Extract metadata 57 metadata := ggufFile.Metadata() 58 59 // Extract version separately (will be set on Package.Version) 60 modelVersion := extractVersion(ggufFile.Header.MetadataKV) 61 62 // Convert to syft metadata structure 63 syftMetadata := &pkg.GGUFFileHeader{ 64 Architecture: metadata.Architecture, 65 Quantization: metadata.FileTypeDescriptor, 66 Parameters: uint64(metadata.Parameters), 67 GGUFVersion: uint32(ggufFile.Header.Version), 68 TensorCount: ggufFile.Header.TensorCount, 69 RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), 70 MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), 71 } 72 73 // If model name is not in metadata, use filename 74 if metadata.Name == "" { 75 metadata.Name = extractModelNameFromPath(reader.Path()) 76 } 77 78 // Create package from metadata 79 p := newGGUFPackage( 80 syftMetadata, 81 metadata.Name, 82 modelVersion, 83 metadata.License, 84 reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 85 ) 86 87 return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file") 88 } 89 90 // computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier 91 func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string { 92 // Sort the KV pairs by key for stable hashing 93 sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata)) 94 copy(sortedKVs, metadata) 95 sort.Slice(sortedKVs, func(i, j int) bool { 96 return sortedKVs[i].Key < sortedKVs[j].Key 97 }) 98 99 // Marshal sorted KVs to JSON for stable hashing 100 jsonBytes, err := json.Marshal(sortedKVs) 101 if err != nil { 102 log.Debugf("failed to marshal metadata for hashing: %v", err) 103 return "" 104 } 105 106 // Compute xxhash 107 hash := xxhash.Sum64(jsonBytes) 108 return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits) 109 } 110 111 // extractVersion attempts to extract version from metadata KV pairs 112 func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string { 113 for _, kv := range kvs { 114 if kv.Key == "general.version" { 115 if v, ok := kv.Value.(string); ok && v != "" { 116 return v 117 } 118 } 119 } 120 return "" 121 } 122 123 // extractModelNameFromPath extracts the model name from the file path 124 func extractModelNameFromPath(path string) string { 125 // Get the base filename 126 base := filepath.Base(path) 127 128 // Remove .gguf extension 129 name := strings.TrimSuffix(base, ".gguf") 130 131 return name 132 } 133 134 // integrity check 135 var _ generic.Parser = parseGGUFModel