github.com/src-d/simple-linguist@v1.7.3/benchmarks/linguist-samples.rb (about) 1 #!/usr/bin/env ruby 2 3 require 'benchmark' 4 require 'linguist' 5 6 iterations = (ARGV[0] || 1).to_i 7 8 # BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection. 9 class BenchBlob < Linguist::FileBlob 10 attr_accessor :data 11 12 def initialize(path, base_path = nil) 13 super 14 @data = File.read(@fullpath) 15 end 16 17 def clean 18 @_mime_type = nil 19 @detect_encoding = nil 20 @lines = nil 21 end 22 end 23 24 def get_samples(root) 25 samples = Array.new 26 Dir.foreach(root) do |file| 27 path = File.join(root, file) 28 if file == "." or file == ".." 29 next 30 elsif File.directory?(path) 31 get_samples(path).each do |blob| 32 samples << blob 33 end 34 else 35 samples << BenchBlob.new(path) 36 end 37 end 38 return samples 39 end 40 41 samples = get_samples('.linguist/samples') 42 languages = Linguist::Language.all 43 44 samples.each do |blob| 45 sample_name = blob.path.gsub(/\s/, '_') 46 Benchmark.bmbm do |bm| 47 bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 48 iterations.times do 49 Linguist::detect(blob) 50 blob.clean 51 end 52 end 53 end 54 end 55 56 samples.each do |blob| 57 sample_name = blob.path.gsub(/\s/, '_') 58 Benchmark.bmbm do |bm| 59 bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 60 iterations.times do 61 Linguist::Classifier.classify(Linguist::Samples.cache, blob.data) 62 blob.clean 63 end 64 end 65 end 66 end 67 68 samples.each do |blob| 69 sample_name = blob.path.gsub(/\s/, '_') 70 Benchmark.bmbm do |bm| 71 bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 72 iterations.times do 73 Linguist::Strategy::Modeline.call(blob, languages) 74 blob.clean 75 end 76 end 77 end 78 end 79 80 samples.each do |blob| 81 sample_name = blob.path.gsub(/\s/, '_') 82 Benchmark.bmbm do |bm| 83 bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 84 iterations.times do 85 Linguist::Strategy::Filename.call(blob, languages) 86 blob.clean 87 end 88 end 89 end 90 end 91 92 samples.each do |blob| 93 sample_name = blob.path.gsub(/\s/, '_') 94 Benchmark.bmbm do |bm| 95 bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 96 iterations.times do 97 Linguist::Shebang.call(blob, languages) 98 blob.clean 99 end 100 end 101 end 102 end 103 104 samples.each do |blob| 105 sample_name = blob.path.gsub(/\s/, '_') 106 Benchmark.bmbm do |bm| 107 bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 108 iterations.times do 109 Linguist::Strategy::Extension.call(blob, languages) 110 blob.clean 111 end 112 end 113 end 114 end 115 116 samples.each do |blob| 117 sample_name = blob.path.gsub(/\s/, '_') 118 Benchmark.bmbm do |bm| 119 bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do 120 iterations.times do 121 Linguist::Heuristics.call(blob, languages) 122 blob.clean 123 end 124 end 125 end 126 end