github.com/ledgerwatch/erigon-lib@v1.0.0/compress/compress_test.go (about)

     1  /*
     2     Copyright 2021 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package compress
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"hash/crc32"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"testing"
    27  
    28  	"github.com/ledgerwatch/log/v3"
    29  	"github.com/stretchr/testify/require"
    30  )
    31  
    32  func TestCompressEmptyDict(t *testing.T) {
    33  	logger := log.New()
    34  	tmpDir := t.TempDir()
    35  	file := filepath.Join(tmpDir, "compressed")
    36  	c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 100, 1, log.LvlDebug, logger)
    37  	if err != nil {
    38  		t.Fatal(err)
    39  	}
    40  	defer c.Close()
    41  
    42  	if err = c.AddWord([]byte("word")); err != nil {
    43  		t.Fatal(err)
    44  	}
    45  	if err = c.Compress(); err != nil {
    46  		t.Fatal(err)
    47  	}
    48  	var d *Decompressor
    49  	if d, err = NewDecompressor(file); err != nil {
    50  		t.Fatal(err)
    51  	}
    52  	defer d.Close()
    53  	g := d.MakeGetter()
    54  	if !g.HasNext() {
    55  		t.Fatalf("expected a word")
    56  	}
    57  	word, _ := g.Next(nil)
    58  	if string(word) != "word" {
    59  		t.Fatalf("expeced word, got (hex) %x", word)
    60  	}
    61  	if g.HasNext() {
    62  		t.Fatalf("not expecting anything else")
    63  	}
    64  }
    65  
    66  // nolint
    67  func checksum(file string) uint32 {
    68  	hasher := crc32.NewIEEE()
    69  	f, err := os.Open(file)
    70  	if err != nil {
    71  		panic(err)
    72  	}
    73  	defer f.Close()
    74  	if _, err := io.Copy(hasher, f); err != nil {
    75  		panic(err)
    76  	}
    77  	return hasher.Sum32()
    78  }
    79  
    80  func prepareDict(t *testing.T) *Decompressor {
    81  	t.Helper()
    82  	logger := log.New()
    83  	tmpDir := t.TempDir()
    84  	file := filepath.Join(tmpDir, "compressed")
    85  	t.Name()
    86  	c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger)
    87  	if err != nil {
    88  		t.Fatal(err)
    89  	}
    90  	defer c.Close()
    91  	for i := 0; i < 100; i++ {
    92  		if err = c.AddWord(nil); err != nil {
    93  			panic(err)
    94  		}
    95  		if err = c.AddWord([]byte("long")); err != nil {
    96  			t.Fatal(err)
    97  		}
    98  		if err = c.AddWord([]byte("word")); err != nil {
    99  			t.Fatal(err)
   100  		}
   101  		if err = c.AddWord([]byte(fmt.Sprintf("%d longlongword %d", i, i))); err != nil {
   102  			t.Fatal(err)
   103  		}
   104  	}
   105  	if err = c.Compress(); err != nil {
   106  		t.Fatal(err)
   107  	}
   108  	var d *Decompressor
   109  	if d, err = NewDecompressor(file); err != nil {
   110  		t.Fatal(err)
   111  	}
   112  	return d
   113  }
   114  
   115  func TestCompressDict1(t *testing.T) {
   116  	d := prepareDict(t)
   117  	defer d.Close()
   118  	g := d.MakeGetter()
   119  	i := 0
   120  	g.Reset(0)
   121  	for g.HasNext() {
   122  		// next word is `nil`
   123  		require.False(t, g.MatchPrefix([]byte("long")))
   124  		require.True(t, g.MatchPrefix([]byte("")))
   125  		require.True(t, g.MatchPrefix([]byte{}))
   126  
   127  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("long")))
   128  		require.Equal(t, 0, g.MatchPrefixCmp([]byte("")))
   129  		require.Equal(t, 0, g.MatchPrefixCmp([]byte{}))
   130  		word, _ := g.Next(nil)
   131  		require.NotNil(t, word)
   132  		require.Zero(t, len(word))
   133  
   134  		// next word is `long`
   135  		require.True(t, g.MatchPrefix([]byte("long")))
   136  		require.False(t, g.MatchPrefix([]byte("longlong")))
   137  		require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
   138  		require.False(t, g.MatchPrefix([]byte("longnotmatch")))
   139  		require.True(t, g.MatchPrefix([]byte{}))
   140  
   141  		require.Equal(t, 0, g.MatchPrefixCmp([]byte("long")))
   142  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("longlong")))
   143  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch")))
   144  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("longnotmatch")))
   145  		require.Equal(t, 0, g.MatchPrefixCmp([]byte{}))
   146  		_, _ = g.Next(nil)
   147  
   148  		// next word is `word`
   149  		require.False(t, g.MatchPrefix([]byte("long")))
   150  		require.False(t, g.MatchPrefix([]byte("longlong")))
   151  		require.True(t, g.MatchPrefix([]byte("word")))
   152  		require.True(t, g.MatchPrefix([]byte("")))
   153  		require.True(t, g.MatchPrefix(nil))
   154  		require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
   155  		require.False(t, g.MatchPrefix([]byte("longnotmatch")))
   156  
   157  		require.Equal(t, -1, g.MatchPrefixCmp([]byte("long")))
   158  		require.Equal(t, -1, g.MatchPrefixCmp([]byte("longlong")))
   159  		require.Equal(t, 0, g.MatchPrefixCmp([]byte("word")))
   160  		require.Equal(t, 0, g.MatchPrefixCmp([]byte("")))
   161  		require.Equal(t, 0, g.MatchPrefixCmp(nil))
   162  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch")))
   163  		require.Equal(t, -1, g.MatchPrefixCmp([]byte("longnotmatch")))
   164  		_, _ = g.Next(nil)
   165  
   166  		// next word is `longlongword %d`
   167  		expectPrefix := fmt.Sprintf("%d long", i)
   168  
   169  		require.True(t, g.MatchPrefix([]byte(fmt.Sprintf("%d", i))))
   170  		require.True(t, g.MatchPrefix([]byte(expectPrefix)))
   171  		require.True(t, g.MatchPrefix([]byte(expectPrefix+"long")))
   172  		require.True(t, g.MatchPrefix([]byte(expectPrefix+"longword ")))
   173  		require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
   174  		require.False(t, g.MatchPrefix([]byte("longnotmatch")))
   175  		require.True(t, g.MatchPrefix([]byte{}))
   176  
   177  		require.Equal(t, 0, g.MatchPrefixCmp([]byte(fmt.Sprintf("%d", i))))
   178  		require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix)))
   179  		require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix+"long")))
   180  		require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix+"longword ")))
   181  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch")))
   182  		require.Equal(t, 1, g.MatchPrefixCmp([]byte("longnotmatch")))
   183  		require.Equal(t, 0, g.MatchPrefixCmp([]byte{}))
   184  		savePos := g.dataP
   185  		word, nextPos := g.Next(nil)
   186  		expected := fmt.Sprintf("%d longlongword %d", i, i)
   187  		g.Reset(savePos)
   188  		require.Equal(t, 0, g.MatchCmp([]byte(expected)))
   189  		g.Reset(nextPos)
   190  		if string(word) != expected {
   191  			t.Errorf("expected %s, got (hex) [%s]", expected, word)
   192  		}
   193  		i++
   194  	}
   195  
   196  	if cs := checksum(d.filePath); cs != 3153486123 {
   197  		// it's ok if hash changed, but need re-generate all existing snapshot hashes
   198  		// in https://github.com/ledgerwatch/erigon-snapshot
   199  		t.Errorf("result file hash changed, %d", cs)
   200  	}
   201  }
   202  
   203  func TestCompressDictCmp(t *testing.T) {
   204  	d := prepareDict(t)
   205  	defer d.Close()
   206  	g := d.MakeGetter()
   207  	i := 0
   208  	g.Reset(0)
   209  	for g.HasNext() {
   210  		// next word is `nil`
   211  		savePos := g.dataP
   212  		require.Equal(t, 1, g.MatchCmp([]byte("long")))
   213  		require.Equal(t, 0, g.MatchCmp([]byte(""))) // moves offset
   214  		g.Reset(savePos)
   215  		require.Equal(t, 0, g.MatchCmp([]byte{})) // moves offset
   216  		g.Reset(savePos)
   217  
   218  		word, _ := g.Next(nil)
   219  		require.NotNil(t, word)
   220  		require.Zero(t, len(word))
   221  
   222  		// next word is `long`
   223  		savePos = g.dataP
   224  		require.Equal(t, 0, g.MatchCmp([]byte("long"))) // moves offset
   225  		g.Reset(savePos)
   226  		require.Equal(t, 1, g.MatchCmp([]byte("longlong")))
   227  		require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch")))
   228  		require.Equal(t, 1, g.MatchCmp([]byte("longnotmatch")))
   229  		require.Equal(t, -1, g.MatchCmp([]byte{}))
   230  		_, _ = g.Next(nil)
   231  
   232  		// next word is `word`
   233  		savePos = g.dataP
   234  		require.Equal(t, -1, g.MatchCmp([]byte("long")))
   235  		require.Equal(t, -1, g.MatchCmp([]byte("longlong")))
   236  		require.Equal(t, 0, g.MatchCmp([]byte("word"))) // moves offset
   237  		g.Reset(savePos)
   238  		require.Equal(t, -1, g.MatchCmp([]byte("")))
   239  		require.Equal(t, -1, g.MatchCmp(nil))
   240  		require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch")))
   241  		require.Equal(t, -1, g.MatchCmp([]byte("longnotmatch")))
   242  		_, _ = g.Next(nil)
   243  
   244  		// next word is `longlongword %d`
   245  		expectPrefix := fmt.Sprintf("%d long", i)
   246  
   247  		require.Equal(t, -1, g.MatchCmp([]byte(fmt.Sprintf("%d", i))))
   248  		require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix)))
   249  		require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix+"long")))
   250  		require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix+"longword ")))
   251  		require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch")))
   252  		require.Equal(t, 1, g.MatchCmp([]byte("longnotmatch")))
   253  		require.Equal(t, -1, g.MatchCmp([]byte{}))
   254  		savePos = g.dataP
   255  		word, nextPos := g.Next(nil)
   256  		expected := fmt.Sprintf("%d longlongword %d", i, i)
   257  		g.Reset(savePos)
   258  		require.Equal(t, 0, g.MatchCmp([]byte(expected)))
   259  		g.Reset(nextPos)
   260  		if string(word) != expected {
   261  			t.Errorf("expected %s, got (hex) [%s]", expected, word)
   262  		}
   263  		i++
   264  	}
   265  
   266  	if cs := checksum(d.filePath); cs != 3153486123 {
   267  		// it's ok if hash changed, but need re-generate all existing snapshot hashes
   268  		// in https://github.com/ledgerwatch/erigon-snapshot
   269  		t.Errorf("result file hash changed, %d", cs)
   270  	}
   271  }