gitlab.com/thomasboni/go-enry/v2@v2.8.3-0.20220418031202-30b0d7a3de98/benchmarks/parser/main.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/csv" 7 "flag" 8 "fmt" 9 "io/ioutil" 10 "log" 11 "math" 12 "os" 13 "path/filepath" 14 "runtime" 15 "sort" 16 "strconv" 17 "strings" 18 ) 19 20 const ( 21 // functions benchmarked 22 getLanguageFunc = "GetLanguage()" 23 classifyFunc = "Classify()" 24 modelineFunc = "GetLanguagesByModeline()" 25 filenameFunc = "GetLanguagesByFilename()" 26 shebangFunc = "GetLanguagesByShebang()" 27 extensionFunc = "GetLanguagesByExtension()" 28 contentFunc = "GetLanguagesByContent()" 29 30 // benchmark's outputs 31 enryTotalBench = "enry_total.bench" 32 enrySamplesBench = "enry_samples.bench" 33 linguistTotalBench = "linguist_total.bench" 34 linguistSamplesBench = "linguist_samples.bench" 35 36 // files to generate 37 enryTotalCSV = "enry-total.csv" 38 enrySamplesCSV = "enry-samples.csv" 39 linguistTotalCSV = "linguist-total.csv" 40 linguistSamplesCSV = "linguist-samples.csv" 41 42 // files to generate with flag distribution 43 enryDistributionCSV = "enry-distribution.csv" 44 linguistDistributionCSV = "linguist-distribution.csv" 45 ) 46 47 var ( 48 // flags 49 distribution bool 50 outDir string 51 52 enryFunctions = []string{getLanguageFunc, classifyFunc, modelineFunc, filenameFunc, shebangFunc, extensionFunc, contentFunc} 53 distributionIntervals = []string{"1us-10us", "10us-100us", "100us-1ms", "1ms-10ms", "10ms-100ms"} 54 ) 55 56 func main() { 57 flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv") 58 flag.StringVar(&outDir, "outdir", "", "path to leave csv files") 59 flag.Parse() 60 61 if distribution { 62 generateDistributionCSV() 63 return 64 } 65 66 generateCSV() 67 } 68 69 func generateDistributionCSV() { 70 CSVFiles := []struct { 71 in string 72 out string 73 tool string 74 }{ 75 {in: enrySamplesCSV, out: enryDistributionCSV, tool: "enry"}, 76 {in: linguistSamplesCSV, out: linguistDistributionCSV, tool: "linguist"}, 77 } 78 79 for _, CSVFile := range CSVFiles { 80 f, err := os.Open(CSVFile.in) 81 if err != nil { 82 log.Println(err) 83 continue 84 } 85 defer f.Close() 86 87 r := csv.NewReader(f) 88 CSVSamples, err := r.ReadAll() 89 if err != nil { 90 log.Println(err) 91 continue 92 } 93 94 CSVDistribution, err := buildDistribution(CSVSamples[1:], CSVFile.tool) 95 if err != nil { 96 log.Println(err) 97 continue 98 } 99 100 if err := writeCSV(CSVDistribution, filepath.Join(outDir, CSVFile.out)); err != nil { 101 log.Println(err) 102 continue 103 } 104 } 105 } 106 107 func buildDistribution(CSVSamples [][]string, tool string) ([][]string, error) { 108 count := make(map[string]int, len(distributionIntervals)) 109 for _, row := range CSVSamples { 110 if row[1] != getLanguageFunc { 111 continue 112 } 113 114 num, err := strconv.ParseFloat(row[len(row)-1], 64) 115 if err != nil { 116 return nil, err 117 } 118 119 arrangeByTime(count, num) 120 } 121 122 CSVDistribution := make([][]string, 0, len(count)+1) 123 firstLine := []string{"timeInterval", tool, "numberOfFiles"} 124 CSVDistribution = append(CSVDistribution, firstLine) 125 for _, interval := range distributionIntervals { 126 number := strconv.FormatInt(int64(count[interval]), 10) 127 row := []string{interval, tool, number} 128 CSVDistribution = append(CSVDistribution, row) 129 } 130 131 printDistributionInfo(count, tool) 132 return CSVDistribution, nil 133 } 134 135 func printDistributionInfo(count map[string]int, tool string) { 136 total := 0 137 for _, v := range count { 138 total += v 139 } 140 141 fmt.Println(tool, "files", total) 142 fmt.Println("Distribution") 143 for _, interval := range distributionIntervals { 144 fmt.Println("\t", interval, count[interval]) 145 } 146 147 fmt.Println("Percentage") 148 for _, interval := range distributionIntervals { 149 p := (float64(count[interval]) / float64(total)) * 100.00 150 fmt.Printf("\t %s %f%%\n", interval, p) 151 } 152 153 fmt.Printf("\n\n") 154 } 155 156 func arrangeByTime(count map[string]int, num float64) { 157 switch { 158 case num > 1000.00 && num <= 10000.00: 159 count[distributionIntervals[0]]++ 160 case num > 10000.00 && num <= 100000.00: 161 count[distributionIntervals[1]]++ 162 case num > 100000.00 && num <= 1000000.00: 163 count[distributionIntervals[2]]++ 164 case num > 1000000.00 && num <= 10000000.00: 165 count[distributionIntervals[3]]++ 166 case num > 10000000.00 && num <= 100000000.00: 167 count[distributionIntervals[4]]++ 168 } 169 } 170 171 func writeCSV(CSVData [][]string, outPath string) error { 172 out, err := os.Create(outPath) 173 if err != nil { 174 return err 175 } 176 177 w := csv.NewWriter(out) 178 w.WriteAll(CSVData) 179 180 if err := w.Error(); err != nil { 181 return err 182 } 183 184 return nil 185 } 186 187 type parse func(data []byte, tool string) ([][]string, error) 188 189 func generateCSV() { 190 bmFiles := []struct { 191 in string 192 out string 193 tool string 194 parse parse 195 }{ 196 {in: enryTotalBench, out: enryTotalCSV, tool: "enry", parse: parseTotal}, 197 {in: linguistTotalBench, out: linguistTotalCSV, tool: "linguist", parse: parseTotal}, 198 {in: enrySamplesBench, out: enrySamplesCSV, tool: "enry", parse: parseSamples}, 199 {in: linguistSamplesBench, out: linguistSamplesCSV, tool: "linguist", parse: parseSamples}, 200 } 201 202 for _, bmFile := range bmFiles { 203 buf, err := ioutil.ReadFile(bmFile.in) 204 if err != nil { 205 log.Println(err) 206 continue 207 } 208 209 info, err := bmFile.parse(buf, bmFile.tool) 210 if err != nil { 211 log.Println(err) 212 continue 213 } 214 215 if err := writeCSV(info, filepath.Join(outDir, bmFile.out)); err != nil { 216 log.Println(err) 217 continue 218 } 219 } 220 } 221 222 func parseTotal(data []byte, tool string) ([][]string, error) { 223 const totalLine = "_TOTAL" 224 parsedInfo := map[string][]string{} 225 buf := bufio.NewScanner(bytes.NewReader(data)) 226 for buf.Scan() { 227 line := buf.Text() 228 if strings.Contains(line, totalLine) { 229 split := strings.Fields(line) 230 row, err := getRow(split, tool) 231 if err != nil { 232 return nil, err 233 } 234 235 parsedInfo[row[0]] = row 236 } 237 } 238 239 if err := buf.Err(); err != nil { 240 return nil, err 241 } 242 243 firstLine := []string{"function", "tool", "iterations", "ns/op"} 244 return prepareInfoForCSV(parsedInfo, firstLine), nil 245 } 246 247 func getRow(line []string, tool string) ([]string, error) { 248 row := make([]string, 0, 3) 249 for _, function := range enryFunctions { 250 if strings.Contains(line[0], function) { 251 row = append(row, function) 252 break 253 } 254 } 255 256 row = append(row, tool) 257 iterations := line[1] 258 row = append(row, iterations) 259 260 average, err := getAverage(line) 261 if err != nil { 262 return nil, err 263 264 } 265 266 row = append(row, average) 267 return row, nil 268 } 269 270 func getAverage(line []string) (string, error) { 271 average := line[len(line)-1] 272 if !strings.HasSuffix(average, ")") { 273 return line[2], nil 274 } 275 276 totalTime := strings.Trim(average, "() ") 277 time, err := strconv.ParseFloat(totalTime, 64) 278 if err != nil { 279 return "", err 280 } 281 282 iterations := line[1] 283 i, err := strconv.ParseFloat(iterations, 64) 284 if err != nil { 285 return "", err 286 } 287 288 avg := (time * math.Pow10(9)) / i 289 return fmt.Sprintf("%d", int(avg)), nil 290 } 291 292 func prepareInfoForCSV(parsedInfo map[string][]string, firstLine []string) [][]string { 293 info := createInfoWithFirstLine(firstLine, len(parsedInfo)) 294 for _, function := range enryFunctions { 295 info = append(info, parsedInfo[function]) 296 } 297 298 return info 299 } 300 301 func createInfoWithFirstLine(firstLine []string, sliceLength int) (info [][]string) { 302 if len(firstLine) > 0 { 303 info = make([][]string, 0, sliceLength+1) 304 info = append(info, firstLine) 305 } else { 306 info = make([][]string, 0, sliceLength) 307 } 308 309 return 310 } 311 312 type enryFuncs map[string][]string 313 314 func newEnryFuncs() enryFuncs { 315 return enryFuncs{ 316 getLanguageFunc: nil, 317 classifyFunc: nil, 318 modelineFunc: nil, 319 filenameFunc: nil, 320 shebangFunc: nil, 321 extensionFunc: nil, 322 contentFunc: nil, 323 } 324 } 325 326 func parseSamples(data []byte, tool string) ([][]string, error) { 327 const sampleLine = "SAMPLE_" 328 parsedInfo := map[string]enryFuncs{} 329 buf := bufio.NewScanner(bytes.NewReader(data)) 330 for buf.Scan() { 331 line := buf.Text() 332 if strings.Contains(line, sampleLine) { 333 split := strings.Fields(line) 334 name := getSampleName(split[0]) 335 if _, ok := parsedInfo[name]; !ok { 336 parsedInfo[name] = newEnryFuncs() 337 } 338 339 row := make([]string, 0, 4) 340 row = append(row, name) 341 r, err := getRow(split, tool) 342 if err != nil { 343 return nil, err 344 } 345 346 row = append(row, r...) 347 function := row[1] 348 parsedInfo[name][function] = row 349 } 350 } 351 352 if err := buf.Err(); err != nil { 353 return nil, err 354 } 355 356 firstLine := []string{"file", "function", "tool", "iterations", "ns/op"} 357 return prepareSamplesInfoForCSV(parsedInfo, firstLine), nil 358 } 359 360 func getSampleName(s string) string { 361 start := strings.Index(s, "SAMPLE_") + len("SAMPLE_") 362 suffix := fmt.Sprintf("-%d", runtime.GOMAXPROCS(-1)) 363 name := strings.TrimSuffix(s[start:], suffix) 364 return name 365 } 366 367 func prepareSamplesInfoForCSV(parsedInfo map[string]enryFuncs, firstLine []string) [][]string { 368 info := createInfoWithFirstLine(firstLine, len(parsedInfo)*len(enryFunctions)) 369 orderedKeys := sortKeys(parsedInfo) 370 for _, path := range orderedKeys { 371 sampleInfo := prepareInfoForCSV(parsedInfo[path], nil) 372 info = append(info, sampleInfo...) 373 } 374 375 return info 376 } 377 378 func sortKeys(parsedInfo map[string]enryFuncs) []string { 379 keys := make([]string, 0, len(parsedInfo)) 380 for key := range parsedInfo { 381 keys = append(keys, key) 382 } 383 384 sort.Strings(keys) 385 return keys 386 }