github.com/noriah/catnip@v1.8.5/dsp/analyzer.go (about)

     1  // Package dsp provides audio analysis
     2  //
     3  // Some notes:
     4  //
     5  // https://dlbeer.co.nz/articles/fftvis.html
     6  // https://www.cg.tuwien.ac.at/courses/WissArbeiten/WS2010/processing.pdf
     7  // https://github.com/hvianna/audioMotion-analyzer/blob/master/src/audioMotion-analyzer.js#L1053
     8  // https://dsp.stackexchange.com/questions/6499/help-calculating-understanding-the-mfccs-mel-frequency-cepstrum-coefficients
     9  // https://stackoverflow.com/questions/3694918/how-to-extract-frequency-associated-with-fft-values-in-python
    10  //   - https://stackoverflow.com/a/27191172
    11  package dsp
    12  
    13  import "math"
    14  
    15  type BinMethod func(int, float64, float64) float64
    16  
    17  type AnalyzerConfig struct {
    18  	SampleRate    float64   // audio sample rate
    19  	SampleSize    int       // number of samples per slice
    20  	SquashLow     bool      // squash the low end the spectrum
    21  	SquashLowOld  bool      // squash the low end using the old method
    22  	DontNormalize bool      // dont run math.Log on output
    23  	BinMethod     BinMethod // method used for calculating bin value
    24  }
    25  
    26  type Analyzer interface {
    27  	BinCount() int
    28  	ProcessBin(int, []complex128) float64
    29  	Recalculate(int) int
    30  }
    31  
    32  // analyzer is an audio spectrum in a buffer
    33  type analyzer struct {
    34  	cfg      AnalyzerConfig // the analyzer config
    35  	bins     []bin          // bins for processing
    36  	binCount int            // number of bins we look at
    37  	fftSize  int            // number of fft bins
    38  }
    39  
    40  // Bin is a helper struct for spectrum
    41  type bin struct {
    42  	powVal   float64 // powpow
    43  	eqVal    float64 // equalizer value
    44  	floorFFT int     // floor fft index
    45  	ceilFFT  int     // ceiling fft index
    46  	// widthFFT int     // fft floor-ceiling index delta
    47  }
    48  
    49  // frequencies are the dividing frequencies
    50  var frequencies = []float64{
    51  	// sub sub bass
    52  	20.0, // 0
    53  	// sub bass
    54  	60.0, // 1
    55  	// bass
    56  	250.0, // 2
    57  	// midrange
    58  	4000.0, // 3
    59  	// treble
    60  	8000.0, // 4
    61  	// brilliance
    62  	22050.0, // 5
    63  	// everything else
    64  }
    65  
    66  // Average all the samples together.
    67  func AverageSamples() BinMethod {
    68  	return func(count int, current, new float64) float64 {
    69  		return current + (new / float64(count))
    70  	}
    71  }
    72  
    73  // Sum all the samples together.
    74  func SumSamples() BinMethod {
    75  	return func(_ int, current, new float64) float64 {
    76  		return current + new
    77  	}
    78  }
    79  
    80  // Return the maximum value of all the samples.
    81  func MaxSampleValue() BinMethod {
    82  	return func(_ int, current, new float64) float64 {
    83  		if current < new {
    84  			return new
    85  		}
    86  		return current
    87  	}
    88  }
    89  
    90  // Return the minimum value of all the samples that is not zero.
    91  func MinNonZeroSampleValue() BinMethod {
    92  	return func(_ int, current, new float64) float64 {
    93  		if current == 0.0 {
    94  			return new
    95  		}
    96  
    97  		if new == 0.0 {
    98  			return current
    99  		}
   100  
   101  		if current > new {
   102  			return new
   103  		}
   104  
   105  		return current
   106  	}
   107  }
   108  
   109  func NewAnalyzer(cfg AnalyzerConfig) Analyzer {
   110  	return &analyzer{
   111  		cfg:     cfg,
   112  		bins:    make([]bin, cfg.SampleSize),
   113  		fftSize: cfg.SampleSize/2 + 1,
   114  	}
   115  }
   116  
   117  // BinCount returns the number of bins each stream has
   118  func (az *analyzer) BinCount() int {
   119  	return az.binCount
   120  }
   121  
   122  func (az *analyzer) ProcessBin(idx int, src []complex128) float64 {
   123  	bin := az.bins[idx]
   124  
   125  	fftFloor, fftCeil := bin.floorFFT, bin.ceilFFT
   126  	if fftCeil > az.fftSize {
   127  		fftCeil = az.fftSize
   128  	}
   129  
   130  	// if fftFloor >= fftCeil {
   131  	// 	fftFloor = fftCeil - 1
   132  	// }
   133  
   134  	src = src[fftFloor:fftCeil]
   135  	mag := 0.0
   136  	count := len(src)
   137  	for _, cmplx := range src {
   138  		power := math.Hypot(real(cmplx), imag(cmplx))
   139  		mag = az.cfg.BinMethod(count, mag, power)
   140  	}
   141  
   142  	if az.cfg.SquashLow {
   143  		// squash the low low end a bit.
   144  		if az.cfg.SquashLowOld {
   145  			if f := az.freqToIdx(400.0, math.Floor); fftFloor < f {
   146  				mag *= 0.65 * (float64(fftFloor+1) / float64(f))
   147  			}
   148  		} else {
   149  			if f := az.freqToIdx(1000.0, math.Floor); fftFloor < f {
   150  				val := math.Min(float64(f), float64(fftFloor+2))
   151  
   152  				mag *= 0.55 * (math.Min(1.0, (val / float64(f))))
   153  			}
   154  		}
   155  	}
   156  
   157  	if mag < 0.0 {
   158  		return 0.0
   159  	}
   160  
   161  	if !az.cfg.DontNormalize {
   162  		mag = math.Log(mag)
   163  	}
   164  
   165  	if mag < 0.0 {
   166  		return 0.0
   167  	}
   168  
   169  	return mag
   170  }
   171  
   172  // Recalculate rebuilds our frequency bins
   173  func (az *analyzer) Recalculate(binCount int) int {
   174  	if az.fftSize == 0 {
   175  		az.fftSize = az.cfg.SampleSize/2 + 1
   176  	}
   177  
   178  	switch {
   179  	case binCount >= az.fftSize:
   180  		binCount = az.fftSize - 1
   181  	case binCount == az.binCount:
   182  		return binCount
   183  	}
   184  
   185  	az.binCount = binCount
   186  
   187  	// clean the binCount
   188  	for idx := range az.bins[:binCount] {
   189  		az.bins[idx].powVal = 0.65
   190  		az.bins[idx].eqVal = 1.0
   191  	}
   192  
   193  	az.distribute(binCount)
   194  
   195  	bassCut := az.freqToIdx(frequencies[2], math.Floor)
   196  	fBassCut := float64(bassCut)
   197  
   198  	// set widths
   199  	for idx, b := range az.bins[:binCount] {
   200  		if b.ceilFFT >= az.fftSize {
   201  			az.bins[idx].ceilFFT = az.fftSize - 1
   202  		}
   203  
   204  		// az.bins[idx].widthFFT = b.ceilFFT - b.floorFFT
   205  
   206  		if b.ceilFFT <= bassCut {
   207  			az.bins[idx].powVal *= math.Max(0.5, float64(b.ceilFFT)/fBassCut)
   208  		}
   209  
   210  	}
   211  
   212  	return binCount
   213  }
   214  
   215  // This is some hot garbage.
   216  // It essentially is a lot of work to just increment from 0 for each next bin.
   217  // Working on replacing this with a real distribution.
   218  func (az *analyzer) distribute(bins int) {
   219  	lo := frequencies[1]
   220  	hi := math.Min(az.cfg.SampleRate/2, frequencies[4])
   221  
   222  	loLog := math.Log10(lo)
   223  	hiLog := math.Log10(hi)
   224  
   225  	cF := (hiLog - loLog) / float64(bins)
   226  
   227  	cCoef := 100.0 / float64(bins+1)
   228  
   229  	for idx := range az.bins[:bins+1] {
   230  
   231  		frequency := ((float64(idx) * cF) + loLog)
   232  		frequency = math.Pow(10.0, frequency)
   233  		fftIdx := az.freqToIdx(frequency, math.Floor)
   234  		az.bins[idx].floorFFT = fftIdx
   235  		az.bins[idx].eqVal = math.Log2(float64(fftIdx)+14) * cCoef
   236  		// az.bins[idx].eqVal = 1.0
   237  
   238  		if idx > 0 {
   239  			if az.bins[idx-1].floorFFT >= az.bins[idx].floorFFT {
   240  				az.bins[idx].floorFFT = az.bins[idx-1].floorFFT + 1
   241  			}
   242  
   243  			az.bins[idx-1].ceilFFT = az.bins[idx].floorFFT
   244  		}
   245  	}
   246  }
   247  
   248  type mathFunc func(float64) float64
   249  
   250  func (az *analyzer) freqToIdx(freq float64, round mathFunc) int {
   251  	b := int(round(freq / (az.cfg.SampleRate / float64(az.cfg.SampleSize))))
   252  
   253  	if b < az.fftSize {
   254  		return b
   255  	}
   256  
   257  	return az.fftSize - 1
   258  }