github.com/anchore/syft@v1.38.2/internal/file/archive_aliases.go (about) 1 package file 2 3 import ( 4 "context" 5 "io" 6 "path/filepath" 7 "strings" 8 9 "github.com/mholt/archives" 10 ) 11 12 // compoundExtensionAliases maps shorthand archive extensions to their full forms. 13 // The mholt/archives library doesn't recognize these aliases natively. 14 // 15 // See: https://github.com/anchore/syft/issues/4416 16 // Reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats 17 var compoundExtensionAliases = map[string]string{ 18 ".tgz": ".tar.gz", 19 ".tbz2": ".tar.bz2", 20 ".txz": ".tar.xz", 21 ".tlz": ".tar.lz", 22 ".tzst": ".tar.zst", 23 } 24 25 // IdentifyArchive is a wrapper around archives.Identify that handles compound extension 26 // aliases (like .tgz -> .tar.gz) transparently. It first attempts filename-based detection 27 // using the alias map, and falls back to content-based detection if needed. 28 // 29 // This function is a drop-in replacement for archives.Identify that centralizes 30 // the compound alias handling logic in one place. 31 func IdentifyArchive(ctx context.Context, path string, r io.Reader) (archives.Format, io.Reader, error) { 32 // First, try to identify using the alias-mapped path (filename-based detection) 33 normalizedPath := handleCompoundArchiveAliases(path) 34 return archives.Identify(ctx, normalizedPath, r) 35 } 36 37 // handleCompoundArchiveAliases normalizes archive file paths that use compound extension 38 // aliases (like .tgz) to their full forms (like .tar.gz) for correct identification 39 // by the mholt/archives library. 40 func handleCompoundArchiveAliases(path string) string { 41 ext := filepath.Ext(path) 42 if newExt, ok := compoundExtensionAliases[ext]; ok { 43 return strings.TrimSuffix(path, ext) + newExt 44 } 45 return path 46 }