github.com/src-d/simple-linguist@v1.7.3/benchmarks/linguist-samples.rb (about)

     1  #!/usr/bin/env ruby
     2  
     3  require 'benchmark'
     4  require 'linguist'
     5  
     6  iterations = (ARGV[0] || 1).to_i
     7  
     8  # BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
     9  class BenchBlob < Linguist::FileBlob
    10    attr_accessor :data
    11  
    12    def initialize(path, base_path = nil)
    13      super
    14      @data = File.read(@fullpath)
    15    end
    16  
    17    def clean
    18      @_mime_type = nil
    19      @detect_encoding = nil
    20      @lines = nil
    21    end
    22  end
    23  
    24  def get_samples(root)
    25    samples = Array.new
    26    Dir.foreach(root) do |file|
    27      path = File.join(root, file)
    28      if file == "." or file == ".."
    29        next
    30      elsif File.directory?(path)
    31        get_samples(path).each do |blob|
    32          samples << blob
    33        end
    34      else
    35        samples << BenchBlob.new(path)
    36      end
    37    end
    38    return samples
    39  end
    40  
    41  samples = get_samples('.linguist/samples')
    42  languages = Linguist::Language.all
    43  
    44  samples.each do |blob|
    45    sample_name = blob.path.gsub(/\s/, '_')
    46    Benchmark.bmbm do |bm|
    47      bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    48        iterations.times do
    49          Linguist::detect(blob)
    50          blob.clean
    51        end
    52      end
    53    end
    54  end
    55  
    56  samples.each do |blob|
    57    sample_name = blob.path.gsub(/\s/, '_')
    58    Benchmark.bmbm do |bm|
    59      bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    60        iterations.times do
    61          Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
    62          blob.clean
    63        end
    64      end
    65    end
    66  end
    67  
    68  samples.each do |blob|
    69    sample_name = blob.path.gsub(/\s/, '_')
    70    Benchmark.bmbm do |bm|
    71      bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    72        iterations.times do
    73          Linguist::Strategy::Modeline.call(blob, languages)
    74          blob.clean
    75        end
    76      end
    77    end
    78  end
    79  
    80  samples.each do |blob|
    81    sample_name = blob.path.gsub(/\s/, '_')
    82    Benchmark.bmbm do |bm|
    83      bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    84      iterations.times do
    85          Linguist::Strategy::Filename.call(blob, languages)
    86          blob.clean
    87        end
    88      end
    89    end
    90  end
    91  
    92  samples.each do |blob|
    93    sample_name = blob.path.gsub(/\s/, '_')
    94    Benchmark.bmbm do |bm|
    95      bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
    96        iterations.times do
    97          Linguist::Shebang.call(blob, languages)
    98          blob.clean
    99        end
   100      end
   101    end
   102  end
   103  
   104  samples.each do |blob|
   105    sample_name = blob.path.gsub(/\s/, '_')
   106    Benchmark.bmbm do |bm|
   107      bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
   108        iterations.times do
   109          Linguist::Strategy::Extension.call(blob, languages)
   110          blob.clean
   111        end
   112      end
   113    end
   114  end
   115  
   116  samples.each do |blob|
   117    sample_name = blob.path.gsub(/\s/, '_')
   118    Benchmark.bmbm do |bm|
   119      bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
   120      iterations.times do
   121          Linguist::Heuristics.call(blob, languages)
   122          blob.clean
   123        end
   124      end
   125    end
   126  end