github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/archive/mime.go (about) 1 // Package archive: write, read, copy, append, list primitives 2 // across all supported formats 3 /* 4 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package archive 7 8 import ( 9 "bytes" 10 "fmt" 11 "io" 12 "os" 13 "strings" 14 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/cmn/debug" 17 "github.com/NVIDIA/aistore/cmn/nlog" 18 "github.com/NVIDIA/aistore/memsys" 19 ) 20 21 // supported archive types (file extensions); see also archExts in cmd/cli/cli/const.go 22 // NOTE: when adding/removing formats - update: 23 // - FileExtensions 24 // - allMagics 25 // - ext/dsort/shard/rw.go 26 const ( 27 ExtTar = ".tar" 28 ExtTgz = ".tgz" 29 ExtTarGz = ".tar.gz" 30 ExtZip = ".zip" 31 ExtTarLz4 = ".tar.lz4" 32 ) 33 34 const ( 35 sizeDetectMime = 512 36 ) 37 38 // - here and elsewhere, mime (string) is a "." + IANA mime 39 // - for standard MIME types, see: cmn/cos/http_headers.go 40 // - references: 41 // * https://en.wikipedia.org/wiki/List_of_file_signatures 42 // * https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types 43 44 type detect struct { 45 mime string // '.' + IANA mime 46 sig []byte 47 offset int 48 } 49 50 var FileExtensions = []string{ExtTar, ExtTgz, ExtTarGz, ExtZip, ExtTarLz4} 51 52 // standard file signatures 53 var ( 54 magicTar = detect{offset: 257, sig: []byte("ustar"), mime: ExtTar} 55 magicGzip = detect{sig: []byte{0x1f, 0x8b}, mime: ExtTarGz} 56 magicZip = detect{sig: []byte{0x50, 0x4b}, mime: ExtZip} 57 magicLz4 = detect{sig: []byte{0x04, 0x22, 0x4d, 0x18}, mime: ExtTarLz4} 58 59 allMagics = []detect{magicTar, magicGzip, magicZip, magicLz4} // NOTE: must contain all 60 ) 61 62 // motivation: prevent from creating archives with non-standard extensions 63 func Strict(mime, filename string) (m string, err error) { 64 if mime != "" { 65 if m, err = normalize(mime); err != nil { 66 return 67 } 68 } 69 m, err = byExt(filename) 70 if err != nil || mime == "" { 71 return 72 } 73 if mime != m { 74 // user-defined (non-empty) MIME must correspond 75 err = fmt.Errorf("mime mismatch %q vs %q", mime, m) 76 } 77 return 78 } 79 80 func Mime(mime, filename string) (string, error) { 81 if mime != "" { 82 return normalize(mime) 83 } 84 return byExt(filename) 85 } 86 87 // e.g. MIME: "application/zip" 88 func normalize(mime string) (string, error) { 89 switch { 90 case strings.Contains(mime, ExtTarGz[1:]): // ExtTarGz contains ExtTar 91 return ExtTarGz, nil 92 case strings.Contains(mime, ExtTarLz4[1:]): // ditto 93 return ExtTarLz4, nil 94 default: 95 for _, ext := range FileExtensions { 96 if strings.Contains(mime, ext[1:]) { 97 return ext, nil 98 } 99 } 100 } 101 return "", NewErrUnknownMime(mime) 102 } 103 104 // by filename extension 105 func byExt(filename string) (string, error) { 106 for _, ext := range FileExtensions { 107 if strings.HasSuffix(filename, ext) { 108 return ext, nil 109 } 110 } 111 return "", NewErrUnknownFileExt(filename, "") 112 } 113 114 // NOTE convention: caller may pass nil `smm` _not_ to spend time (usage: listing and reading) 115 func MimeFile(file *os.File, smm *memsys.MMSA, mime, archname string) (m string, err error) { 116 m, err = Mime(mime, archname) 117 if err == nil || IsErrUnknownMime(err) { 118 return 119 } 120 if smm == nil { 121 err = NewErrUnknownFileExt(archname, "not reading file magic") 122 return 123 } 124 // by magic 125 var ( 126 n int 127 buf, slab = smm.AllocSize(sizeDetectMime) 128 ) 129 m, n, err = _detect(file, archname, buf) 130 if n > 0 { 131 _, errV := file.Seek(0, io.SeekStart) 132 debug.AssertNoErr(errV) 133 if err == nil { 134 err = errV 135 } 136 if err == nil { 137 nlog.Infoln("archname", archname, "is in fact", m, "(via magic sign)") 138 } 139 } 140 slab.Free(buf) 141 return 142 } 143 144 // NOTE: 145 // - on purpose redundant vs the above - not to open file if can be avoided 146 // - convention: caller may pass nil `smm` _not_ to spend time (usage: listing and reading) 147 func MimeFQN(smm *memsys.MMSA, mime, archname string) (m string, err error) { 148 m, err = Mime(mime, archname) 149 if err == nil || IsErrUnknownMime(err) { 150 return 151 } 152 if smm == nil { 153 err = NewErrUnknownFileExt(archname, "not reading file magic") 154 return 155 } 156 fh, err := os.Open(archname) 157 if err != nil { 158 return "", err 159 } 160 buf, slab := smm.AllocSize(sizeDetectMime) 161 m, _, err = _detect(fh, archname, buf) 162 slab.Free(buf) 163 cos.Close(fh) 164 return 165 } 166 167 func _detect(file *os.File, archname string, buf []byte) (m string, n int, err error) { 168 n, err = file.Read(buf) 169 if err != nil { 170 return 171 } 172 if n < sizeDetectMime { 173 err = NewErrUnknownFileExt(archname, "file is too short") 174 return 175 } 176 for _, magic := range allMagics { 177 if n > magic.offset && bytes.HasPrefix(buf[magic.offset:], magic.sig) { 178 return magic.mime, n, nil 179 } 180 } 181 err = fmt.Errorf("failed to detect supported file signatures in %q", archname) 182 return 183 } 184 185 func EqExt(ext1, ext2 string) bool { 186 switch { 187 case ext1 == ext2: 188 return true 189 case ext1 == ExtTarGz && ext2 == ExtTgz: 190 return true 191 case ext2 == ExtTarGz && ext1 == ExtTgz: 192 return true 193 } 194 return false 195 }