gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/client9/misspell/mime.go (about) 1 package misspell 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "net/http" 9 "os" 10 "path/filepath" 11 "strings" 12 ) 13 14 // The number of possible binary formats is very large 15 // items that might be checked into a repo or be an 16 // artifact of a build. Additions welcome. 17 // 18 // Golang's internal table is very small and can't be 19 // relied on. Even then things like ".js" have a mime 20 // type of "application/javascipt" which isn't very helpful. 21 // "[x]" means we have sniff test and suffix test should be eliminated 22 var binary = map[string]bool{ 23 ".a": true, // [ ] archive 24 ".bin": true, // [ ] binary 25 ".bz2": true, // [ ] compression 26 ".class": true, // [x] Java class file 27 ".dll": true, // [ ] shared library 28 ".exe": true, // [ ] binary 29 ".gif": true, // [ ] image 30 ".gpg": true, // [x] text, but really all base64 31 ".gz": true, // [ ] compression 32 ".ico": true, // [ ] image 33 ".jar": true, // [x] archive 34 ".jpeg": true, // [ ] image 35 ".jpg": true, // [ ] image 36 ".mp3": true, // [ ] audio 37 ".mp4": true, // [ ] video 38 ".mpeg": true, // [ ] video 39 ".o": true, // [ ] object file 40 ".pdf": true, // [x] pdf 41 ".png": true, // [x] image 42 ".pyc": true, // [ ] Python bytecode 43 ".pyo": true, // [ ] Python bytecode 44 ".so": true, // [x] shared library 45 ".swp": true, // [ ] vim swap file 46 ".tar": true, // [ ] archive 47 ".tiff": true, // [ ] image 48 ".woff": true, // [ ] font 49 ".woff2": true, // [ ] font 50 ".xz": true, // [ ] compression 51 ".z": true, // [ ] compression 52 ".zip": true, // [x] archive 53 } 54 55 // isBinaryFilename returns true if the file is likely to be binary 56 // 57 // Better heuristics could be done here, in particular a binary 58 // file is unlikely to be UTF-8 encoded. However this is cheap 59 // and will solve the immediate need of making sure common 60 // binary formats are not corrupted by mistake. 61 func isBinaryFilename(s string) bool { 62 return binary[strings.ToLower(filepath.Ext(s))] 63 } 64 65 var scm = map[string]bool{ 66 ".bzr": true, 67 ".git": true, 68 ".hg": true, 69 ".svn": true, 70 "CVS": true, 71 } 72 73 // isSCMPath returns true if the path is likely part of a (private) SCM 74 // directory. E.g. ./git/something = true 75 func isSCMPath(s string) bool { 76 // hack for .git/COMMIT_EDITMSG and .git/TAG_EDITMSG 77 // normally we don't look at anything in .git 78 // but COMMIT_EDITMSG and TAG_EDITMSG are used as 79 // temp files for git commits. Allowing misspell to inspect 80 // these files allows for commit-msg hooks 81 // https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks 82 if strings.Contains(filepath.Base(s), "EDITMSG") { 83 return false 84 } 85 parts := strings.Split(filepath.Clean(s), string(filepath.Separator)) 86 for _, dir := range parts { 87 if scm[dir] { 88 return true 89 } 90 } 91 return false 92 } 93 94 var magicHeaders = [][]byte{ 95 // Issue #68 96 // PGP messages and signatures are "text" but really just 97 // blobs of base64-text and should not be misspell-checked 98 []byte("-----BEGIN PGP MESSAGE-----"), 99 []byte("-----BEGIN PGP SIGNATURE-----"), 100 101 // ELF 102 {0x7f, 0x45, 0x4c, 0x46}, 103 104 // Postscript 105 {0x25, 0x21, 0x50, 0x53}, 106 107 // PDF 108 {0x25, 0x50, 0x44, 0x46}, 109 110 // Java class file 111 // https://en.wikipedia.org/wiki/Java_class_file 112 {0xCA, 0xFE, 0xBA, 0xBE}, 113 114 // PNG 115 // https://en.wikipedia.org/wiki/Portable_Network_Graphics 116 {0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a}, 117 118 // ZIP, JAR, ODF, OOXML 119 {0x50, 0x4B, 0x03, 0x04}, 120 {0x50, 0x4B, 0x05, 0x06}, 121 {0x50, 0x4B, 0x07, 0x08}, 122 } 123 124 func isTextFile(raw []byte) bool { 125 for _, magic := range magicHeaders { 126 if bytes.HasPrefix(raw, magic) { 127 return false 128 } 129 } 130 131 // allow any text/ type with utf-8 encoding 132 // DetectContentType sometimes returns charset=utf-16 for XML stuff 133 // in which case ignore. 134 mime := http.DetectContentType(raw) 135 return strings.HasPrefix(mime, "text/") && strings.HasSuffix(mime, "charset=utf-8") 136 } 137 138 // ReadTextFile returns the contents of a file, first testing if it is a text file 139 // returns ("", nil) if not a text file 140 // returns ("", error) if error 141 // returns (string, nil) if text 142 // 143 // unfortunately, in worse case, this does 144 // 1 stat 145 // 1 open,read,close of 512 bytes 146 // 1 more stat,open, read everything, close (via ioutil.ReadAll) 147 // This could be kinder to the filesystem. 148 // 149 // This uses some heuristics of the file's extension (e.g. .zip, .txt) and 150 // uses a sniffer to determine if the file is text or not. 151 // Using file extensions isn't great, but probably 152 // good enough for real-world use. 153 // Golang's built in sniffer is problematic for differnet reasons. It's 154 // optimized for HTML, and is very limited in detection. It would be good 155 // to explicitly add some tests for ELF/DWARF formats to make sure we never 156 // corrupt binary files. 157 func ReadTextFile(filename string) (string, error) { 158 if isBinaryFilename(filename) { 159 return "", nil 160 } 161 162 if isSCMPath(filename) { 163 return "", nil 164 } 165 166 fstat, err := os.Stat(filename) 167 168 if err != nil { 169 return "", fmt.Errorf("Unable to stat %q: %s", filename, err) 170 } 171 172 // directory: nothing to do. 173 if fstat.IsDir() { 174 return "", nil 175 } 176 177 // avoid reading in multi-gig files 178 // if input is large, read the first 512 bytes to sniff type 179 // if not-text, then exit 180 isText := false 181 if fstat.Size() > 50000 { 182 fin, err := os.Open(filename) 183 if err != nil { 184 return "", fmt.Errorf("Unable to open large file %q: %s", filename, err) 185 } 186 defer fin.Close() 187 buf := make([]byte, 512) 188 _, err = io.ReadFull(fin, buf) 189 if err != nil { 190 return "", fmt.Errorf("Unable to read 512 bytes from %q: %s", filename, err) 191 } 192 if !isTextFile(buf) { 193 return "", nil 194 } 195 196 // set so we don't double check this file 197 isText = true 198 } 199 200 // read in whole file 201 raw, err := ioutil.ReadFile(filename) 202 if err != nil { 203 return "", fmt.Errorf("Unable to read all %q: %s", filename, err) 204 } 205 206 if !isText && !isTextFile(raw) { 207 return "", nil 208 } 209 return string(raw), nil 210 }