github.com/ledgerwatch/erigon-lib@v1.0.0/compress/compress_test.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package compress 18 19 import ( 20 "context" 21 "fmt" 22 "hash/crc32" 23 "io" 24 "os" 25 "path/filepath" 26 "testing" 27 28 "github.com/ledgerwatch/log/v3" 29 "github.com/stretchr/testify/require" 30 ) 31 32 func TestCompressEmptyDict(t *testing.T) { 33 logger := log.New() 34 tmpDir := t.TempDir() 35 file := filepath.Join(tmpDir, "compressed") 36 c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 100, 1, log.LvlDebug, logger) 37 if err != nil { 38 t.Fatal(err) 39 } 40 defer c.Close() 41 42 if err = c.AddWord([]byte("word")); err != nil { 43 t.Fatal(err) 44 } 45 if err = c.Compress(); err != nil { 46 t.Fatal(err) 47 } 48 var d *Decompressor 49 if d, err = NewDecompressor(file); err != nil { 50 t.Fatal(err) 51 } 52 defer d.Close() 53 g := d.MakeGetter() 54 if !g.HasNext() { 55 t.Fatalf("expected a word") 56 } 57 word, _ := g.Next(nil) 58 if string(word) != "word" { 59 t.Fatalf("expeced word, got (hex) %x", word) 60 } 61 if g.HasNext() { 62 t.Fatalf("not expecting anything else") 63 } 64 } 65 66 // nolint 67 func checksum(file string) uint32 { 68 hasher := crc32.NewIEEE() 69 f, err := os.Open(file) 70 if err != nil { 71 panic(err) 72 } 73 defer f.Close() 74 if _, err := io.Copy(hasher, f); err != nil { 75 panic(err) 76 } 77 return hasher.Sum32() 78 } 79 80 func prepareDict(t *testing.T) *Decompressor { 81 t.Helper() 82 logger := log.New() 83 tmpDir := t.TempDir() 84 file := filepath.Join(tmpDir, "compressed") 85 t.Name() 86 c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug, logger) 87 if err != nil { 88 t.Fatal(err) 89 } 90 defer c.Close() 91 for i := 0; i < 100; i++ { 92 if err = c.AddWord(nil); err != nil { 93 panic(err) 94 } 95 if err = c.AddWord([]byte("long")); err != nil { 96 t.Fatal(err) 97 } 98 if err = c.AddWord([]byte("word")); err != nil { 99 t.Fatal(err) 100 } 101 if err = c.AddWord([]byte(fmt.Sprintf("%d longlongword %d", i, i))); err != nil { 102 t.Fatal(err) 103 } 104 } 105 if err = c.Compress(); err != nil { 106 t.Fatal(err) 107 } 108 var d *Decompressor 109 if d, err = NewDecompressor(file); err != nil { 110 t.Fatal(err) 111 } 112 return d 113 } 114 115 func TestCompressDict1(t *testing.T) { 116 d := prepareDict(t) 117 defer d.Close() 118 g := d.MakeGetter() 119 i := 0 120 g.Reset(0) 121 for g.HasNext() { 122 // next word is `nil` 123 require.False(t, g.MatchPrefix([]byte("long"))) 124 require.True(t, g.MatchPrefix([]byte(""))) 125 require.True(t, g.MatchPrefix([]byte{})) 126 127 require.Equal(t, 1, g.MatchPrefixCmp([]byte("long"))) 128 require.Equal(t, 0, g.MatchPrefixCmp([]byte(""))) 129 require.Equal(t, 0, g.MatchPrefixCmp([]byte{})) 130 word, _ := g.Next(nil) 131 require.NotNil(t, word) 132 require.Zero(t, len(word)) 133 134 // next word is `long` 135 require.True(t, g.MatchPrefix([]byte("long"))) 136 require.False(t, g.MatchPrefix([]byte("longlong"))) 137 require.False(t, g.MatchPrefix([]byte("wordnotmatch"))) 138 require.False(t, g.MatchPrefix([]byte("longnotmatch"))) 139 require.True(t, g.MatchPrefix([]byte{})) 140 141 require.Equal(t, 0, g.MatchPrefixCmp([]byte("long"))) 142 require.Equal(t, 1, g.MatchPrefixCmp([]byte("longlong"))) 143 require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch"))) 144 require.Equal(t, 1, g.MatchPrefixCmp([]byte("longnotmatch"))) 145 require.Equal(t, 0, g.MatchPrefixCmp([]byte{})) 146 _, _ = g.Next(nil) 147 148 // next word is `word` 149 require.False(t, g.MatchPrefix([]byte("long"))) 150 require.False(t, g.MatchPrefix([]byte("longlong"))) 151 require.True(t, g.MatchPrefix([]byte("word"))) 152 require.True(t, g.MatchPrefix([]byte(""))) 153 require.True(t, g.MatchPrefix(nil)) 154 require.False(t, g.MatchPrefix([]byte("wordnotmatch"))) 155 require.False(t, g.MatchPrefix([]byte("longnotmatch"))) 156 157 require.Equal(t, -1, g.MatchPrefixCmp([]byte("long"))) 158 require.Equal(t, -1, g.MatchPrefixCmp([]byte("longlong"))) 159 require.Equal(t, 0, g.MatchPrefixCmp([]byte("word"))) 160 require.Equal(t, 0, g.MatchPrefixCmp([]byte(""))) 161 require.Equal(t, 0, g.MatchPrefixCmp(nil)) 162 require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch"))) 163 require.Equal(t, -1, g.MatchPrefixCmp([]byte("longnotmatch"))) 164 _, _ = g.Next(nil) 165 166 // next word is `longlongword %d` 167 expectPrefix := fmt.Sprintf("%d long", i) 168 169 require.True(t, g.MatchPrefix([]byte(fmt.Sprintf("%d", i)))) 170 require.True(t, g.MatchPrefix([]byte(expectPrefix))) 171 require.True(t, g.MatchPrefix([]byte(expectPrefix+"long"))) 172 require.True(t, g.MatchPrefix([]byte(expectPrefix+"longword "))) 173 require.False(t, g.MatchPrefix([]byte("wordnotmatch"))) 174 require.False(t, g.MatchPrefix([]byte("longnotmatch"))) 175 require.True(t, g.MatchPrefix([]byte{})) 176 177 require.Equal(t, 0, g.MatchPrefixCmp([]byte(fmt.Sprintf("%d", i)))) 178 require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix))) 179 require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix+"long"))) 180 require.Equal(t, 0, g.MatchPrefixCmp([]byte(expectPrefix+"longword "))) 181 require.Equal(t, 1, g.MatchPrefixCmp([]byte("wordnotmatch"))) 182 require.Equal(t, 1, g.MatchPrefixCmp([]byte("longnotmatch"))) 183 require.Equal(t, 0, g.MatchPrefixCmp([]byte{})) 184 savePos := g.dataP 185 word, nextPos := g.Next(nil) 186 expected := fmt.Sprintf("%d longlongword %d", i, i) 187 g.Reset(savePos) 188 require.Equal(t, 0, g.MatchCmp([]byte(expected))) 189 g.Reset(nextPos) 190 if string(word) != expected { 191 t.Errorf("expected %s, got (hex) [%s]", expected, word) 192 } 193 i++ 194 } 195 196 if cs := checksum(d.filePath); cs != 3153486123 { 197 // it's ok if hash changed, but need re-generate all existing snapshot hashes 198 // in https://github.com/ledgerwatch/erigon-snapshot 199 t.Errorf("result file hash changed, %d", cs) 200 } 201 } 202 203 func TestCompressDictCmp(t *testing.T) { 204 d := prepareDict(t) 205 defer d.Close() 206 g := d.MakeGetter() 207 i := 0 208 g.Reset(0) 209 for g.HasNext() { 210 // next word is `nil` 211 savePos := g.dataP 212 require.Equal(t, 1, g.MatchCmp([]byte("long"))) 213 require.Equal(t, 0, g.MatchCmp([]byte(""))) // moves offset 214 g.Reset(savePos) 215 require.Equal(t, 0, g.MatchCmp([]byte{})) // moves offset 216 g.Reset(savePos) 217 218 word, _ := g.Next(nil) 219 require.NotNil(t, word) 220 require.Zero(t, len(word)) 221 222 // next word is `long` 223 savePos = g.dataP 224 require.Equal(t, 0, g.MatchCmp([]byte("long"))) // moves offset 225 g.Reset(savePos) 226 require.Equal(t, 1, g.MatchCmp([]byte("longlong"))) 227 require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch"))) 228 require.Equal(t, 1, g.MatchCmp([]byte("longnotmatch"))) 229 require.Equal(t, -1, g.MatchCmp([]byte{})) 230 _, _ = g.Next(nil) 231 232 // next word is `word` 233 savePos = g.dataP 234 require.Equal(t, -1, g.MatchCmp([]byte("long"))) 235 require.Equal(t, -1, g.MatchCmp([]byte("longlong"))) 236 require.Equal(t, 0, g.MatchCmp([]byte("word"))) // moves offset 237 g.Reset(savePos) 238 require.Equal(t, -1, g.MatchCmp([]byte(""))) 239 require.Equal(t, -1, g.MatchCmp(nil)) 240 require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch"))) 241 require.Equal(t, -1, g.MatchCmp([]byte("longnotmatch"))) 242 _, _ = g.Next(nil) 243 244 // next word is `longlongword %d` 245 expectPrefix := fmt.Sprintf("%d long", i) 246 247 require.Equal(t, -1, g.MatchCmp([]byte(fmt.Sprintf("%d", i)))) 248 require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix))) 249 require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix+"long"))) 250 require.Equal(t, -1, g.MatchCmp([]byte(expectPrefix+"longword "))) 251 require.Equal(t, 1, g.MatchCmp([]byte("wordnotmatch"))) 252 require.Equal(t, 1, g.MatchCmp([]byte("longnotmatch"))) 253 require.Equal(t, -1, g.MatchCmp([]byte{})) 254 savePos = g.dataP 255 word, nextPos := g.Next(nil) 256 expected := fmt.Sprintf("%d longlongword %d", i, i) 257 g.Reset(savePos) 258 require.Equal(t, 0, g.MatchCmp([]byte(expected))) 259 g.Reset(nextPos) 260 if string(word) != expected { 261 t.Errorf("expected %s, got (hex) [%s]", expected, word) 262 } 263 i++ 264 } 265 266 if cs := checksum(d.filePath); cs != 3153486123 { 267 // it's ok if hash changed, but need re-generate all existing snapshot hashes 268 // in https://github.com/ledgerwatch/erigon-snapshot 269 t.Errorf("result file hash changed, %d", cs) 270 } 271 }