gitlab.com/thomasboni/go-enry/v2@v2.8.3-0.20220418031202-30b0d7a3de98/benchmarks/parser/main.go (about)

     1  package main
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/csv"
     7  	"flag"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"log"
    11  	"math"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"sort"
    16  	"strconv"
    17  	"strings"
    18  )
    19  
    20  const (
    21  	// functions benchmarked
    22  	getLanguageFunc = "GetLanguage()"
    23  	classifyFunc    = "Classify()"
    24  	modelineFunc    = "GetLanguagesByModeline()"
    25  	filenameFunc    = "GetLanguagesByFilename()"
    26  	shebangFunc     = "GetLanguagesByShebang()"
    27  	extensionFunc   = "GetLanguagesByExtension()"
    28  	contentFunc     = "GetLanguagesByContent()"
    29  
    30  	// benchmark's outputs
    31  	enryTotalBench       = "enry_total.bench"
    32  	enrySamplesBench     = "enry_samples.bench"
    33  	linguistTotalBench   = "linguist_total.bench"
    34  	linguistSamplesBench = "linguist_samples.bench"
    35  
    36  	// files to generate
    37  	enryTotalCSV       = "enry-total.csv"
    38  	enrySamplesCSV     = "enry-samples.csv"
    39  	linguistTotalCSV   = "linguist-total.csv"
    40  	linguistSamplesCSV = "linguist-samples.csv"
    41  
    42  	// files to generate with flag distribution
    43  	enryDistributionCSV     = "enry-distribution.csv"
    44  	linguistDistributionCSV = "linguist-distribution.csv"
    45  )
    46  
    47  var (
    48  	// flags
    49  	distribution bool
    50  	outDir       string
    51  
    52  	enryFunctions         = []string{getLanguageFunc, classifyFunc, modelineFunc, filenameFunc, shebangFunc, extensionFunc, contentFunc}
    53  	distributionIntervals = []string{"1us-10us", "10us-100us", "100us-1ms", "1ms-10ms", "10ms-100ms"}
    54  )
    55  
    56  func main() {
    57  	flag.BoolVar(&distribution, "distribution", false, "generate enry-distribuition.csv and linguist-distribution.csv")
    58  	flag.StringVar(&outDir, "outdir", "", "path to leave csv files")
    59  	flag.Parse()
    60  
    61  	if distribution {
    62  		generateDistributionCSV()
    63  		return
    64  	}
    65  
    66  	generateCSV()
    67  }
    68  
    69  func generateDistributionCSV() {
    70  	CSVFiles := []struct {
    71  		in   string
    72  		out  string
    73  		tool string
    74  	}{
    75  		{in: enrySamplesCSV, out: enryDistributionCSV, tool: "enry"},
    76  		{in: linguistSamplesCSV, out: linguistDistributionCSV, tool: "linguist"},
    77  	}
    78  
    79  	for _, CSVFile := range CSVFiles {
    80  		f, err := os.Open(CSVFile.in)
    81  		if err != nil {
    82  			log.Println(err)
    83  			continue
    84  		}
    85  		defer f.Close()
    86  
    87  		r := csv.NewReader(f)
    88  		CSVSamples, err := r.ReadAll()
    89  		if err != nil {
    90  			log.Println(err)
    91  			continue
    92  		}
    93  
    94  		CSVDistribution, err := buildDistribution(CSVSamples[1:], CSVFile.tool)
    95  		if err != nil {
    96  			log.Println(err)
    97  			continue
    98  		}
    99  
   100  		if err := writeCSV(CSVDistribution, filepath.Join(outDir, CSVFile.out)); err != nil {
   101  			log.Println(err)
   102  			continue
   103  		}
   104  	}
   105  }
   106  
   107  func buildDistribution(CSVSamples [][]string, tool string) ([][]string, error) {
   108  	count := make(map[string]int, len(distributionIntervals))
   109  	for _, row := range CSVSamples {
   110  		if row[1] != getLanguageFunc {
   111  			continue
   112  		}
   113  
   114  		num, err := strconv.ParseFloat(row[len(row)-1], 64)
   115  		if err != nil {
   116  			return nil, err
   117  		}
   118  
   119  		arrangeByTime(count, num)
   120  	}
   121  
   122  	CSVDistribution := make([][]string, 0, len(count)+1)
   123  	firstLine := []string{"timeInterval", tool, "numberOfFiles"}
   124  	CSVDistribution = append(CSVDistribution, firstLine)
   125  	for _, interval := range distributionIntervals {
   126  		number := strconv.FormatInt(int64(count[interval]), 10)
   127  		row := []string{interval, tool, number}
   128  		CSVDistribution = append(CSVDistribution, row)
   129  	}
   130  
   131  	printDistributionInfo(count, tool)
   132  	return CSVDistribution, nil
   133  }
   134  
   135  func printDistributionInfo(count map[string]int, tool string) {
   136  	total := 0
   137  	for _, v := range count {
   138  		total += v
   139  	}
   140  
   141  	fmt.Println(tool, "files", total)
   142  	fmt.Println("Distribution")
   143  	for _, interval := range distributionIntervals {
   144  		fmt.Println("\t", interval, count[interval])
   145  	}
   146  
   147  	fmt.Println("Percentage")
   148  	for _, interval := range distributionIntervals {
   149  		p := (float64(count[interval]) / float64(total)) * 100.00
   150  		fmt.Printf("\t %s %f%%\n", interval, p)
   151  	}
   152  
   153  	fmt.Printf("\n\n")
   154  }
   155  
   156  func arrangeByTime(count map[string]int, num float64) {
   157  	switch {
   158  	case num > 1000.00 && num <= 10000.00:
   159  		count[distributionIntervals[0]]++
   160  	case num > 10000.00 && num <= 100000.00:
   161  		count[distributionIntervals[1]]++
   162  	case num > 100000.00 && num <= 1000000.00:
   163  		count[distributionIntervals[2]]++
   164  	case num > 1000000.00 && num <= 10000000.00:
   165  		count[distributionIntervals[3]]++
   166  	case num > 10000000.00 && num <= 100000000.00:
   167  		count[distributionIntervals[4]]++
   168  	}
   169  }
   170  
   171  func writeCSV(CSVData [][]string, outPath string) error {
   172  	out, err := os.Create(outPath)
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	w := csv.NewWriter(out)
   178  	w.WriteAll(CSVData)
   179  
   180  	if err := w.Error(); err != nil {
   181  		return err
   182  	}
   183  
   184  	return nil
   185  }
   186  
   187  type parse func(data []byte, tool string) ([][]string, error)
   188  
   189  func generateCSV() {
   190  	bmFiles := []struct {
   191  		in    string
   192  		out   string
   193  		tool  string
   194  		parse parse
   195  	}{
   196  		{in: enryTotalBench, out: enryTotalCSV, tool: "enry", parse: parseTotal},
   197  		{in: linguistTotalBench, out: linguistTotalCSV, tool: "linguist", parse: parseTotal},
   198  		{in: enrySamplesBench, out: enrySamplesCSV, tool: "enry", parse: parseSamples},
   199  		{in: linguistSamplesBench, out: linguistSamplesCSV, tool: "linguist", parse: parseSamples},
   200  	}
   201  
   202  	for _, bmFile := range bmFiles {
   203  		buf, err := ioutil.ReadFile(bmFile.in)
   204  		if err != nil {
   205  			log.Println(err)
   206  			continue
   207  		}
   208  
   209  		info, err := bmFile.parse(buf, bmFile.tool)
   210  		if err != nil {
   211  			log.Println(err)
   212  			continue
   213  		}
   214  
   215  		if err := writeCSV(info, filepath.Join(outDir, bmFile.out)); err != nil {
   216  			log.Println(err)
   217  			continue
   218  		}
   219  	}
   220  }
   221  
   222  func parseTotal(data []byte, tool string) ([][]string, error) {
   223  	const totalLine = "_TOTAL"
   224  	parsedInfo := map[string][]string{}
   225  	buf := bufio.NewScanner(bytes.NewReader(data))
   226  	for buf.Scan() {
   227  		line := buf.Text()
   228  		if strings.Contains(line, totalLine) {
   229  			split := strings.Fields(line)
   230  			row, err := getRow(split, tool)
   231  			if err != nil {
   232  				return nil, err
   233  			}
   234  
   235  			parsedInfo[row[0]] = row
   236  		}
   237  	}
   238  
   239  	if err := buf.Err(); err != nil {
   240  		return nil, err
   241  	}
   242  
   243  	firstLine := []string{"function", "tool", "iterations", "ns/op"}
   244  	return prepareInfoForCSV(parsedInfo, firstLine), nil
   245  }
   246  
   247  func getRow(line []string, tool string) ([]string, error) {
   248  	row := make([]string, 0, 3)
   249  	for _, function := range enryFunctions {
   250  		if strings.Contains(line[0], function) {
   251  			row = append(row, function)
   252  			break
   253  		}
   254  	}
   255  
   256  	row = append(row, tool)
   257  	iterations := line[1]
   258  	row = append(row, iterations)
   259  
   260  	average, err := getAverage(line)
   261  	if err != nil {
   262  		return nil, err
   263  
   264  	}
   265  
   266  	row = append(row, average)
   267  	return row, nil
   268  }
   269  
   270  func getAverage(line []string) (string, error) {
   271  	average := line[len(line)-1]
   272  	if !strings.HasSuffix(average, ")") {
   273  		return line[2], nil
   274  	}
   275  
   276  	totalTime := strings.Trim(average, "() ")
   277  	time, err := strconv.ParseFloat(totalTime, 64)
   278  	if err != nil {
   279  		return "", err
   280  	}
   281  
   282  	iterations := line[1]
   283  	i, err := strconv.ParseFloat(iterations, 64)
   284  	if err != nil {
   285  		return "", err
   286  	}
   287  
   288  	avg := (time * math.Pow10(9)) / i
   289  	return fmt.Sprintf("%d", int(avg)), nil
   290  }
   291  
   292  func prepareInfoForCSV(parsedInfo map[string][]string, firstLine []string) [][]string {
   293  	info := createInfoWithFirstLine(firstLine, len(parsedInfo))
   294  	for _, function := range enryFunctions {
   295  		info = append(info, parsedInfo[function])
   296  	}
   297  
   298  	return info
   299  }
   300  
   301  func createInfoWithFirstLine(firstLine []string, sliceLength int) (info [][]string) {
   302  	if len(firstLine) > 0 {
   303  		info = make([][]string, 0, sliceLength+1)
   304  		info = append(info, firstLine)
   305  	} else {
   306  		info = make([][]string, 0, sliceLength)
   307  	}
   308  
   309  	return
   310  }
   311  
   312  type enryFuncs map[string][]string
   313  
   314  func newEnryFuncs() enryFuncs {
   315  	return enryFuncs{
   316  		getLanguageFunc: nil,
   317  		classifyFunc:    nil,
   318  		modelineFunc:    nil,
   319  		filenameFunc:    nil,
   320  		shebangFunc:     nil,
   321  		extensionFunc:   nil,
   322  		contentFunc:     nil,
   323  	}
   324  }
   325  
   326  func parseSamples(data []byte, tool string) ([][]string, error) {
   327  	const sampleLine = "SAMPLE_"
   328  	parsedInfo := map[string]enryFuncs{}
   329  	buf := bufio.NewScanner(bytes.NewReader(data))
   330  	for buf.Scan() {
   331  		line := buf.Text()
   332  		if strings.Contains(line, sampleLine) {
   333  			split := strings.Fields(line)
   334  			name := getSampleName(split[0])
   335  			if _, ok := parsedInfo[name]; !ok {
   336  				parsedInfo[name] = newEnryFuncs()
   337  			}
   338  
   339  			row := make([]string, 0, 4)
   340  			row = append(row, name)
   341  			r, err := getRow(split, tool)
   342  			if err != nil {
   343  				return nil, err
   344  			}
   345  
   346  			row = append(row, r...)
   347  			function := row[1]
   348  			parsedInfo[name][function] = row
   349  		}
   350  	}
   351  
   352  	if err := buf.Err(); err != nil {
   353  		return nil, err
   354  	}
   355  
   356  	firstLine := []string{"file", "function", "tool", "iterations", "ns/op"}
   357  	return prepareSamplesInfoForCSV(parsedInfo, firstLine), nil
   358  }
   359  
   360  func getSampleName(s string) string {
   361  	start := strings.Index(s, "SAMPLE_") + len("SAMPLE_")
   362  	suffix := fmt.Sprintf("-%d", runtime.GOMAXPROCS(-1))
   363  	name := strings.TrimSuffix(s[start:], suffix)
   364  	return name
   365  }
   366  
   367  func prepareSamplesInfoForCSV(parsedInfo map[string]enryFuncs, firstLine []string) [][]string {
   368  	info := createInfoWithFirstLine(firstLine, len(parsedInfo)*len(enryFunctions))
   369  	orderedKeys := sortKeys(parsedInfo)
   370  	for _, path := range orderedKeys {
   371  		sampleInfo := prepareInfoForCSV(parsedInfo[path], nil)
   372  		info = append(info, sampleInfo...)
   373  	}
   374  
   375  	return info
   376  }
   377  
   378  func sortKeys(parsedInfo map[string]enryFuncs) []string {
   379  	keys := make([]string, 0, len(parsedInfo))
   380  	for key := range parsedInfo {
   381  		keys = append(keys, key)
   382  	}
   383  
   384  	sort.Strings(keys)
   385  	return keys
   386  }