github.com/rohankumardubey/draft-classic@v0.16.0/pkg/linguist/linguist.go (about) 1 package linguist 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "os" 8 "path/filepath" 9 "runtime" 10 "sort" 11 "strings" 12 13 "github.com/Azure/draft/pkg/osutil" 14 log "github.com/sirupsen/logrus" 15 ) 16 17 var ( 18 isIgnored func(string) bool 19 isDetectedInGitAttributes func(filename string) string 20 ) 21 22 // used for displaying results 23 type ( 24 // Language is the programming langage and the percentage on how sure linguist feels about its 25 // decision. 26 Language struct { 27 Language string `json:"language"` 28 Percent float64 `json:"percent"` 29 // Color represents the color associated with the language in HTML hex notation. 30 Color string `json:"color"` 31 } 32 ) 33 34 // sortableResult is a list or programming languages, sorted based on the likelihood of the 35 // primary programming language the application was written in. 36 type sortableResult []*Language 37 38 func (s sortableResult) Len() int { 39 return len(s) 40 } 41 42 func (s sortableResult) Less(i, j int) bool { 43 return s[i].Percent < s[j].Percent 44 } 45 46 func (s sortableResult) Swap(i, j int) { 47 s[i], s[j] = s[j], s[i] 48 } 49 50 func initLinguistAttributes(dir string) error { 51 ignore := []string{} 52 except := []string{} 53 detected := make(map[string]string) 54 55 gitignoreExists, err := osutil.Exists(filepath.Join(dir, ".gitignore")) 56 if err != nil { 57 return err 58 } 59 if gitignoreExists { 60 log.Debugln("found .gitignore") 61 62 f, err := os.Open(filepath.Join(dir, ".gitignore")) 63 if err != nil { 64 return err 65 } 66 defer f.Close() 67 68 ignoreScanner := bufio.NewScanner(f) 69 for ignoreScanner.Scan() { 70 var isExcept bool 71 path := strings.TrimSpace(ignoreScanner.Text()) 72 // if it's whitespace or a comment 73 if len(path) == 0 || string(path[0]) == "#" { 74 continue 75 } 76 if string(path[0]) == "!" { 77 isExcept = true 78 path = path[1:] 79 } 80 p := strings.Trim(path, string(filepath.Separator)) 81 if isExcept { 82 except = append(except, p) 83 } else { 84 ignore = append(ignore, p) 85 } 86 } 87 if err := ignoreScanner.Err(); err != nil { 88 return fmt.Errorf("error reading .gitignore: %v", err) 89 } 90 } 91 92 gitAttributesExists, err := osutil.Exists(filepath.Join(dir, ".gitattributes")) 93 if err != nil { 94 return err 95 } 96 if gitAttributesExists { 97 log.Debugln("found .gitattributes") 98 99 f, err := os.Open(filepath.Join(dir, ".gitattributes")) 100 if err != nil { 101 return err 102 } 103 defer f.Close() 104 105 attributeScanner := bufio.NewScanner(f) 106 var lineNumber int 107 for attributeScanner.Scan() { 108 lineNumber++ 109 line := strings.TrimSpace(attributeScanner.Text()) 110 words := strings.Fields(line) 111 if len(words) != 2 { 112 log.Printf("invalid line in .gitattributes at L%d: '%s'\n", lineNumber, line) 113 continue 114 } 115 path := strings.Trim(words[0], string(filepath.Separator)) 116 if runtime.GOOS == "windows" { 117 // on Windows, we also accept / as a path separator, so let's strip those as well 118 path = strings.Trim(words[0], "/") 119 } 120 attribute := words[1] 121 if strings.HasPrefix(attribute, "linguist-documentation") || strings.HasPrefix(attribute, "linguist-vendored") || strings.HasPrefix(attribute, "linguist-generated") { 122 if !strings.HasSuffix(strings.ToLower(attribute), "false") { 123 ignore = append(ignore, path) 124 } 125 } else if strings.HasPrefix(attribute, "linguist-language") { 126 attr := strings.Split(attribute, "=") 127 if len(attr) != 2 { 128 log.Printf("invalid line in .gitattributes at L%d: '%s'\n", lineNumber, line) 129 continue 130 } 131 language := attr[1] 132 detected[path] = language 133 } 134 } 135 if err := attributeScanner.Err(); err != nil { 136 return fmt.Errorf("error reading .gitattributes: %v", err) 137 } 138 } 139 140 isIgnored = func(filename string) bool { 141 for _, p := range ignore { 142 cleanPath, err := filepath.Rel(dir, filename) 143 if err != nil { 144 log.Debugf("could not get relative path: %v", err) 145 return false 146 } 147 if m, _ := filepath.Match(p, cleanPath); m { 148 for _, e := range except { 149 if m, _ := filepath.Match(e, cleanPath); m { 150 return false 151 } 152 } 153 return true 154 } 155 } 156 return false 157 } 158 isDetectedInGitAttributes = func(filename string) string { 159 for p, lang := range detected { 160 cleanPath, err := filepath.Rel(dir, filename) 161 if err != nil { 162 log.Debugf("could not get relative path: %v", err) 163 return "" 164 } 165 if m, _ := filepath.Match(p, cleanPath); m { 166 return lang 167 } 168 } 169 return "" 170 } 171 return nil 172 } 173 174 // shoutouts to php 175 func fileGetContents(filename string) ([]byte, error) { 176 log.Debugln("reading contents of", filename) 177 178 // read only first 512 bytes of files 179 contents := make([]byte, 512) 180 f, err := os.Open(filename) 181 if err != nil { 182 return nil, err 183 } 184 _, err = f.Read(contents) 185 f.Close() 186 if err != io.EOF { 187 if err != nil { 188 return nil, err 189 } 190 } 191 return contents, nil 192 } 193 194 // ProcessDir walks through a directory and returns a list of sorted languages within that directory. 195 func ProcessDir(dirname string) ([]*Language, error) { 196 var ( 197 langs = make(map[string]int) 198 totalSize int 199 ) 200 if err := initLinguistAttributes(dirname); err != nil { 201 return nil, err 202 } 203 exists, err := osutil.Exists(dirname) 204 if err != nil { 205 return nil, err 206 } 207 if !exists { 208 return nil, os.ErrNotExist 209 } 210 filepath.Walk(dirname, func(path string, file os.FileInfo, err error) error { 211 size := int(file.Size()) 212 log.Debugf("with file: %s", path) 213 log.Debugln(path, "is", size, "bytes") 214 if isIgnored(path) { 215 log.Debugln(path, "is ignored, skipping") 216 if file.IsDir() { 217 return filepath.SkipDir 218 } 219 return nil 220 } 221 if size == 0 { 222 log.Debugln(path, "is empty file, skipping") 223 return nil 224 } 225 if file.IsDir() { 226 if file.Name() == ".git" { 227 log.Debugln(".git directory, skipping") 228 return filepath.SkipDir 229 } 230 } else if (file.Mode() & os.ModeSymlink) == 0 { 231 if ShouldIgnoreFilename(path) { 232 log.Debugf("%s: filename should be ignored, skipping", path) 233 return nil 234 } 235 236 byGitAttr := isDetectedInGitAttributes(path) 237 if byGitAttr != "" { 238 log.Debugln(path, "got result by .gitattributes: ", byGitAttr) 239 langs[byGitAttr] += size 240 totalSize += size 241 return nil 242 } 243 244 if byName := LanguageByFilename(path); byName != "" { 245 log.Debugln(path, "got result by name: ", byName) 246 langs[byName] += size 247 totalSize += size 248 return nil 249 } 250 251 contents, err := fileGetContents(path) 252 if err != nil { 253 return err 254 } 255 256 if ShouldIgnoreContents(contents) { 257 log.Debugln(path, ": contents should be ignored, skipping") 258 return nil 259 } 260 261 hints := LanguageHints(path) 262 log.Debugf("%s got language hints: %#v\n", path, hints) 263 byData := LanguageByContents(contents, hints) 264 265 if byData != "" { 266 log.Debugln(path, "got result by data: ", byData) 267 langs[byData] += size 268 totalSize += size 269 return nil 270 } 271 272 log.Debugln(path, "got no result!!") 273 langs["(unknown)"] += size 274 totalSize += size 275 } 276 return nil 277 }) 278 279 results := []*Language{} 280 for lang, size := range langs { 281 l := &Language{ 282 Language: lang, 283 Percent: (float64(size) / float64(totalSize)) * 100.0, 284 Color: LanguageColor(lang), 285 } 286 results = append(results, l) 287 log.Debugf("language: %s percent: %f color: %s", l.Language, l.Percent, l.Color) 288 } 289 sort.Sort(sort.Reverse(sortableResult(results))) 290 return results, nil 291 } 292 293 // Alias returns the language name for a given known alias. 294 // 295 // Occasionally linguist comes up with odd language names, or determines a Java app as a "Maven POM" 296 // app, which in essence is the same thing for Draft's intent. 297 func Alias(lang *Language) *Language { 298 packAliases := map[string]string{ 299 "maven pom": "Java", 300 "c#": "csharp", 301 } 302 303 if alias, ok := packAliases[strings.ToLower(lang.Language)]; ok { 304 lang.Language = alias 305 } 306 return lang 307 }