github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/physicscompress/summarize.go (about)

     1  // +build ignore
     2  
     3  package main
     4  
     5  import (
     6  	"bytes"
     7  	"flag"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"sort"
    11  
    12  	"github.com/egonelbre/exp/bit"
    13  )
    14  
    15  func check(err error) {
    16  	if err != nil {
    17  		panic(err)
    18  	}
    19  }
    20  
    21  var byteCount = [256]int{}
    22  
    23  func main() {
    24  	flag.Parse()
    25  	filename := flag.Arg(0)
    26  
    27  	for i := range byteCount {
    28  		byteCount[i] = bit.Count(uint64(i))
    29  	}
    30  
    31  	data, err := ioutil.ReadFile(filename)
    32  	check(err)
    33  
    34  	bitcount := 0
    35  	for _, b := range data {
    36  		bitcount += byteCount[b]
    37  	}
    38  	fmt.Printf("bit bias: %.4f\n", float64(bitcount)/float64(len(data)*8))
    39  
    40  	r := bit.NewReader(bytes.NewReader(data))
    41  	circle := byte(0)
    42  
    43  	count2 := make(map[byte]int)
    44  	count3 := make(map[byte]int)
    45  	count4 := make(map[byte]int)
    46  	count5 := make(map[byte]int)
    47  	count6 := make(map[byte]int)
    48  	count7 := make(map[byte]int)
    49  	count8 := make(map[byte]int)
    50  
    51  	var v int
    52  	for r.Error() == nil {
    53  		v = r.ReadBit()
    54  		circle <<= 1
    55  		circle |= byte(v)
    56  
    57  		count2[circle&(1<<2-1)] += 1
    58  		count3[circle&(1<<3-1)] += 1
    59  		count4[circle&(1<<4-1)] += 1
    60  		count5[circle&(1<<5-1)] += 1
    61  		count6[circle&(1<<6-1)] += 1
    62  		count7[circle&(1<<7-1)] += 1
    63  		count8[circle&(1<<8-1)] += 1
    64  	}
    65  
    66  	PrintFrequency("2", count2, data)
    67  	PrintFrequency("3", count3, data)
    68  	PrintFrequency("4", count4, data)
    69  	PrintFrequency("5", count5, data)
    70  	PrintFrequency("6", count6, data)
    71  	PrintFrequency("7", count7, data)
    72  	PrintFrequency("8", count8, data)
    73  }
    74  
    75  type Freq struct {
    76  	Pattern byte
    77  	Value   float64
    78  }
    79  
    80  type Table []Freq
    81  
    82  func (s Table) Len() int           { return len(s) }
    83  func (s Table) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
    84  func (s Table) Less(i, j int) bool { return s[i].Value > s[j].Value }
    85  
    86  func PrintFrequency(name string, freq map[byte]int, data []byte) {
    87  	fmt.Println("F ", name)
    88  
    89  	table := make(Table, 0, len(freq))
    90  	for pat, count := range freq {
    91  		entry := Freq{pat, float64(count) / float64(len(data)*8)}
    92  		table = append(table, entry)
    93  	}
    94  	sort.Sort(table)
    95  
    96  	for _, freq := range table {
    97  		fmt.Printf("   %8b: %.5f\n", freq.Pattern, freq.Value)
    98  	}
    99  }