github.com/NeowayLabs/nash@v0.2.2-0.20200127205349-a227041ffd50/stdbin/strings/strings_test.go (about) 1 package main_test 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "testing" 10 11 strings "github.com/madlambda/nash/stdbin/strings" 12 ) 13 14 func TestStrings(t *testing.T) { 15 16 type testcase struct { 17 name string 18 input func([]byte) []byte 19 output []string 20 minWordSize uint 21 } 22 23 tcases := []testcase{ 24 { 25 name: "UTF-8With2Bytes", 26 minWordSize: 1, 27 input: func(bin []byte) []byte { 28 return append([]byte("λ"), bin...) 29 }, 30 output: []string{"λ"}, 31 }, 32 { 33 name: "UTF-8With3Bytes", 34 minWordSize: 1, 35 input: func(bin []byte) []byte { 36 return append([]byte("€"), bin...) 37 }, 38 output: []string{"€"}, 39 }, 40 { 41 name: "UTF-8With4Bytes", 42 minWordSize: 1, 43 input: func(bin []byte) []byte { 44 return append([]byte("𐍈"), bin...) 45 }, 46 output: []string{"𐍈"}, 47 }, 48 { 49 name: "NonASCIIWordHasOneLessCharThanMin", 50 minWordSize: 2, 51 input: func(bin []byte) []byte { 52 return append([]byte("λ"), bin...) 53 }, 54 output: []string{}, 55 }, 56 { 57 name: "NonASCIIWordHasMinWordSize", 58 minWordSize: 2, 59 input: func(bin []byte) []byte { 60 return append([]byte("λλ"), bin...) 61 }, 62 output: []string{"λλ"}, 63 }, 64 { 65 name: "WordHasOneLessCharThanMin", 66 minWordSize: 2, 67 input: func(bin []byte) []byte { 68 return append([]byte("k"), bin...) 69 }, 70 output: []string{}, 71 }, 72 { 73 name: "WordHasMinWordSize", 74 minWordSize: 2, 75 input: func(bin []byte) []byte { 76 return append([]byte("kz"), bin...) 77 }, 78 output: []string{"kz"}, 79 }, 80 { 81 name: "WordHasOneMoreCharThanMinWordSize", 82 minWordSize: 2, 83 input: func(bin []byte) []byte { 84 return append([]byte("ktz"), bin...) 85 }, 86 output: []string{"ktz"}, 87 }, 88 { 89 name: "StartingWithOneChar", 90 minWordSize: 1, 91 input: func(bin []byte) []byte { 92 return append([]byte("k"), bin...) 93 }, 94 output: []string{"k"}, 95 }, 96 { 97 name: "EndWithOneChar", 98 minWordSize: 1, 99 input: func(bin []byte) []byte { 100 return append(bin, []byte("k")...) 101 }, 102 output: []string{"k"}, 103 }, 104 { 105 name: "OneCharInTheMiddle", 106 minWordSize: 1, 107 input: func(bin []byte) []byte { 108 t := append(bin, []byte("k")...) 109 t = append(t, bin...) 110 return t 111 }, 112 output: []string{"k"}, 113 }, 114 { 115 name: "StartingWithText", 116 minWordSize: 1, 117 input: func(bin []byte) []byte { 118 expected := "textOnBeggining" 119 return append([]byte(expected), bin...) 120 }, 121 output: []string{"textOnBeggining"}, 122 }, 123 { 124 name: "TextOnMiddle", 125 minWordSize: 1, 126 input: func(bin []byte) []byte { 127 expected := "textOnMiddle" 128 return append(bin, append([]byte(expected), bin...)...) 129 }, 130 output: []string{"textOnMiddle"}, 131 }, 132 { 133 name: "NonASCIITextOnMiddle", 134 minWordSize: 1, 135 input: func(bin []byte) []byte { 136 expected := "λλλ" 137 return append(bin, append([]byte(expected), bin...)...) 138 }, 139 output: []string{"λλλ"}, 140 }, 141 { 142 name: "ASCIIAndNonASCII", 143 minWordSize: 1, 144 input: func(bin []byte) []byte { 145 expected := "(define (λ (x) (+ x a)))" 146 return append(bin, append([]byte(expected), bin...)...) 147 }, 148 output: []string{"(define (λ (x) (+ x a)))"}, 149 }, 150 { 151 name: "TextOnEnd", 152 minWordSize: 1, 153 input: func(bin []byte) []byte { 154 expected := "textOnEnd" 155 return append(bin, append([]byte(expected), bin...)...) 156 }, 157 output: []string{"textOnEnd"}, 158 }, 159 { 160 name: "JustText", 161 minWordSize: 1, 162 input: func(bin []byte) []byte { 163 return []byte("justtext") 164 }, 165 output: []string{"justtext"}, 166 }, 167 { 168 name: "JustBinary", 169 minWordSize: 1, 170 input: func(bin []byte) []byte { 171 return bin 172 }, 173 output: []string{}, 174 }, 175 { 176 name: "TextSeparatedByBinary", 177 minWordSize: 1, 178 input: func(bin []byte) []byte { 179 text := []byte("text") 180 t := []byte{} 181 t = append(t, bin...) 182 t = append(t, text...) 183 t = append(t, bin...) 184 t = append(t, text...) 185 return t 186 }, 187 output: []string{"text", "text"}, 188 }, 189 { 190 name: "NonASCIITextSeparatedByBinary", 191 minWordSize: 1, 192 input: func(bin []byte) []byte { 193 text := []byte("awesomeλ=)") 194 t := []byte{} 195 t = append(t, bin...) 196 t = append(t, text...) 197 t = append(t, bin...) 198 t = append(t, text...) 199 return t 200 }, 201 output: []string{"awesomeλ=)", "awesomeλ=)"}, 202 }, 203 { 204 name: "WordsAreNotAccumulativeBetweenBinData", 205 minWordSize: 2, 206 input: func(bin []byte) []byte { 207 t := append([]byte("k"), bin...) 208 return append(t, byte('t')) 209 }, 210 output: []string{}, 211 }, 212 { 213 name: "ASCIISeparatedByByteThatLooksLikeUTF", 214 minWordSize: 1, 215 input: func(bin []byte) []byte { 216 return append([]byte{ 217 'n', 218 runestart, 219 'k', 220 }, bin...) 221 }, 222 output: []string{"n", "k"}, 223 }, 224 { 225 name: "ASCIIAfterPossibleFirstByteOfUTF", 226 minWordSize: 1, 227 input: func(bin []byte) []byte { 228 return append([]byte{ 229 runestart, 230 'k', 231 }, bin...) 232 }, 233 output: []string{"k"}, 234 }, 235 { 236 name: "ASCIIAfterPossibleSecondByteOfUTF", 237 minWordSize: 1, 238 input: func(bin []byte) []byte { 239 return append([]byte{ 240 byte(0xE2), 241 byte(0x82), 242 'k', 243 }, bin...) 244 }, 245 output: []string{"k"}, 246 }, 247 { 248 name: "ASCIIAfterPossibleThirdByteOfUTF", 249 minWordSize: 1, 250 input: func(bin []byte) []byte { 251 return append([]byte{ 252 byte(0xF0), 253 byte(0x90), 254 byte(0x8D), 255 'k', 256 }, bin...) 257 }, 258 output: []string{"k"}, 259 }, 260 { 261 name: "AfterFalseRuneStartRuneStartOnSecondByte", 262 minWordSize: 1, 263 input: func(bin []byte) []byte { 264 i := []byte{byte(0xF0)} 265 i = append(i, []byte("λ")...) 266 return append(i, bin...) 267 }, 268 output: []string{"λ"}, 269 }, 270 { 271 name: "AfterFalseRuneStartRuneStartOnThirdByte", 272 minWordSize: 1, 273 input: func(bin []byte) []byte { 274 i := []byte{byte(0xF0), byte(0x90)} 275 i = append(i, []byte("λ")...) 276 return append(i, bin...) 277 }, 278 output: []string{"λ"}, 279 }, 280 { 281 name: "AfterFalseRuneStartRuneStartOnFourthByte", 282 minWordSize: 1, 283 input: func(bin []byte) []byte { 284 i := []byte{byte(0xF0), byte(0x90), byte(0x8D)} 285 i = append(i, []byte("λ")...) 286 return append(i, bin...) 287 }, 288 output: []string{"λ"}, 289 }, 290 { 291 name: "ASCIIFakeRuneAndThemRune", 292 minWordSize: 1, 293 input: func(bin []byte) []byte { 294 i := []byte{'v'} 295 i = append(i, byte(0xF0)) 296 i = append(i, []byte("λ")...) 297 return append(i, bin...) 298 }, 299 output: []string{"v", "λ"}, 300 }, 301 { 302 name: "ASCIISplittedByZero", 303 minWordSize: 1, 304 input: func([]byte) []byte { 305 return []byte{'k', 0, 'n', 0, 'v'} 306 }, 307 output: []string{"k", "n", "v"}, 308 }, 309 { 310 name: "RunesSplittedByZero", 311 minWordSize: 1, 312 input: func([]byte) []byte { 313 i := []byte("λ") 314 i = append(i, 0) 315 i = append(i, []byte("λ")...) 316 return i 317 }, 318 output: []string{"λ", "λ"}, 319 }, 320 { 321 name: "ASCIIAndRunesSplittedByZero", 322 minWordSize: 1, 323 input: func([]byte) []byte { 324 i := []byte("λ") 325 i = append(i, 0) 326 i = append(i, 's') 327 i = append(i, 0) 328 i = append(i, []byte("λ")...) 329 return i 330 }, 331 output: []string{"λ", "s", "λ"}, 332 }, 333 } 334 335 minBinChunkSize := 1 336 maxBinChunkSize := 128 337 338 for _, tcase := range tcases { 339 for i := minBinChunkSize; i <= maxBinChunkSize; i++ { 340 binsize := i 341 testname := fmt.Sprintf("%s/binSize%d", tcase.name, binsize) 342 t.Run(testname, func(t *testing.T) { 343 bin := newBinary(uint(binsize)) 344 input := tcase.input(bin) 345 scanner := strings.Do(bytes.NewBuffer(input), tcase.minWordSize) 346 347 lines := []string{} 348 for scanner.Scan() { 349 lines = append(lines, scanner.Text()) 350 } 351 352 if len(lines) != len(tcase.output) { 353 t.Errorf("wanted size[%d] got size[%d]", len(tcase.output), len(lines)) 354 t.Fatalf("wanted[%s] got[%s]", tcase.output, lines) 355 } 356 357 for i, want := range tcase.output { 358 got := lines[i] 359 if want != got { 360 t.Errorf("unexpected line at[%d]", i) 361 t.Errorf("wanted[%s] got[%s]", want, got) 362 t.Errorf("wantedLines[%s] gotLines[%s]", tcase.output, lines) 363 } 364 } 365 366 if scanner.Err() != nil { 367 t.Fatalf("unexpected error[%s]", scanner.Err()) 368 } 369 }) 370 } 371 } 372 } 373 374 func TestStringsReadErrorOnFirstByte(t *testing.T) { 375 var minWordSize uint = 1 376 scanner := strings.Do(newFakeReader(func(d []byte) (int, error) { 377 return 0, errors.New("fake injected error") 378 }), minWordSize) 379 assertScannerFails(t, scanner, 0) 380 } 381 382 func TestStringsReadErrorOnSecondByte(t *testing.T) { 383 var minWordSize uint = 1 384 sentFirstByte := false 385 scanner := strings.Do(newFakeReader(func(d []byte) (int, error) { 386 if sentFirstByte { 387 return 0, errors.New("fake injected error") 388 } 389 d[0] = 'k' 390 sentFirstByte = true 391 return 1, nil 392 }), minWordSize) 393 assertScannerFails(t, scanner, 1) 394 } 395 396 func TestStringsReadErrorAfterValidUTF8StartingByte(t *testing.T) { 397 var minWordSize uint = 1 398 sentFirstByte := false 399 scanner := strings.Do(newFakeReader(func(d []byte) (int, error) { 400 if sentFirstByte { 401 return 0, errors.New("fake injected error") 402 } 403 sentFirstByte = true 404 d[0] = runestart 405 return 1, nil 406 }), minWordSize) 407 assertScannerFails(t, scanner, 0) 408 } 409 410 func TestStringsReadCanReturnEOFWithData(t *testing.T) { 411 var minWordSize uint = 1 412 want := byte('k') 413 414 scanner := strings.Do(newFakeReader(func(d []byte) (int, error) { 415 if len(d) == 0 { 416 t.Fatal("empty data on Read operation") 417 } 418 d[0] = want 419 return 1, io.EOF 420 }), minWordSize) 421 422 if !scanner.Scan() { 423 t.Fatal("unexpected Scan failure") 424 } 425 got := scanner.Text() 426 if string(want) != got { 427 t.Fatalf("want[%s] != got[%s]", string(want), got) 428 } 429 } 430 431 const runestart byte = 0xC2 432 433 type FakeReader struct { 434 read func([]byte) (int, error) 435 } 436 437 func (f *FakeReader) Read(d []byte) (int, error) { 438 if f.read == nil { 439 return 0, fmt.Errorf("FakeReader has no Read implementation") 440 } 441 return f.read(d) 442 } 443 444 func newFakeReader(read func([]byte) (int, error)) *FakeReader { 445 return &FakeReader{read: read} 446 } 447 448 func assertScannerFails(t *testing.T, scanner *bufio.Scanner, expectedIter uint) { 449 var iterations uint 450 for scanner.Scan() { 451 iterations += 1 452 } 453 454 if iterations != expectedIter { 455 t.Fatalf("expected[%d] Scan calls, got [%d]", expectedIter, iterations) 456 } 457 458 if scanner.Err() == nil { 459 t.Fatal("expected failure on scanner, got none") 460 } 461 } 462 463 func newBinary(size uint) []byte { 464 // WHY: Starting with the most significant bit as 1 helps to test 465 // UTF-8 corner cases. Don't change this without providing 466 // testing for this. Not the best way to do this (not explicit) 467 // but it is what we have for today =). 468 bin := make([]byte, size) 469 for i := 0; i < int(size); i++ { 470 bin[i] = 0xFF 471 } 472 return bin 473 }