github.com/everdrone/grab@v0.1.7-0.20230416223925-40674b995521/internal/instance/cache_test.go (about) 1 package instance 2 3 import ( 4 "net/http/httptest" 5 "path/filepath" 6 "reflect" 7 "regexp" 8 "testing" 9 10 "github.com/everdrone/grab/internal/config" 11 tu "github.com/everdrone/grab/testutils" 12 ) 13 14 func TestBuildSiteCache(t *testing.T) { 15 tests := []struct { 16 Name string 17 RegexCache config.RegexCacheMap 18 URLs []string 19 Config *config.Config 20 Want *config.Config 21 }{ 22 { 23 Name: "no urls", 24 Config: &config.Config{}, 25 RegexCache: config.RegexCacheMap{}, 26 URLs: []string{}, 27 Want: &config.Config{}, 28 }, 29 { 30 Name: "one url", 31 RegexCache: config.RegexCacheMap{ 32 `example\.com`: regexp.MustCompile(`example\.com`), 33 }, 34 URLs: []string{"https://example.com/gallery/test"}, 35 Config: &config.Config{ 36 Sites: []config.SiteConfig{ 37 {Name: "example", 38 Test: "example\\.com", 39 }, 40 }, 41 }, 42 Want: &config.Config{ 43 Sites: []config.SiteConfig{ 44 {Name: "example", 45 Test: "example\\.com", 46 URLs: []string{"https://example.com/gallery/test"}, 47 }, 48 }, 49 }, 50 }, 51 { 52 Name: "multiple urls", 53 RegexCache: config.RegexCacheMap{ 54 `example\.com`: regexp.MustCompile(`example\.com`), 55 }, 56 URLs: []string{"https://example.com/gallery/test", "https://example.com/other"}, 57 Config: &config.Config{ 58 Sites: []config.SiteConfig{ 59 {Name: "example", 60 Test: "example\\.com", 61 }, 62 }, 63 }, 64 Want: &config.Config{ 65 Sites: []config.SiteConfig{ 66 {Name: "example", 67 Test: "example\\.com", 68 URLs: []string{"https://example.com/gallery/test", "https://example.com/other"}, 69 }, 70 }, 71 }, 72 }, 73 { 74 Name: "no matches", 75 RegexCache: config.RegexCacheMap{ 76 "example\\.com": regexp.MustCompile(`example\.com`), 77 }, 78 URLs: []string{"https://not-matching.com/gallery/1", "https://not-matching.com/gallery/1"}, 79 Config: &config.Config{ 80 Sites: []config.SiteConfig{ 81 {Name: "example", 82 Test: "example\\.com", 83 }, 84 }, 85 }, 86 Want: &config.Config{ 87 Sites: []config.SiteConfig{ 88 {Name: "example", 89 Test: "example\\.com", 90 URLs: []string(nil), 91 }, 92 }, 93 }, 94 }, 95 } 96 97 for _, tt := range tests { 98 t.Run(tt.Name, func(tc *testing.T) { 99 g := New(nil) 100 101 g.Config = tt.Config 102 g.RegexCache = tt.RegexCache 103 g.URLs = tt.URLs 104 105 g.BuildSiteCache() 106 107 if !reflect.DeepEqual(g.Config, tt.Want) { 108 tc.Errorf("got: %+v, want: %+v", g.Config, tt.Want) 109 } 110 }) 111 } 112 } 113 114 func TestRemovePathFromURL(t *testing.T) { 115 tests := []struct { 116 Name string 117 URL string 118 Want string 119 WantErr bool 120 }{ 121 { 122 Name: "no path", 123 URL: "https://example.com", 124 Want: "https://example.com", 125 }, 126 { 127 Name: "path", 128 URL: "https://example.com/path", 129 Want: "https://example.com", 130 }, 131 { 132 Name: "invalid url", 133 URL: "1ht tp://example.com", 134 WantErr: true, 135 }, 136 } 137 138 for _, tt := range tests { 139 t.Run(tt.Name, func(tc *testing.T) { 140 got, err := removePathFromURL(tt.URL) 141 if (err != nil) != tt.WantErr { 142 tc.Errorf("got: %+v, want: %+v", err, tt.WantErr) 143 } 144 145 if tt.Want != "" { 146 if got.String() != tt.Want { 147 tc.Errorf("got: %s, want: %s", got, tt.Want) 148 } 149 } 150 }) 151 } 152 } 153 154 // FIXME: from this line down, the code is a mess. 155 // it does test the functionality of cache.go but it's very very messy. 156 // it should be refactored. 157 func TestBuildAssetCache(t *testing.T) { 158 testPath := `/gallery/123/test?id=543` 159 root := tu.GetOSRoot() 160 globalLocation := filepath.Join(root, "global") 161 162 // create test server 163 e := tu.CreateMockServer() 164 165 // hacky way of getting the same port as echo's listener 166 // see: https://stackoverflow.com/a/42218765 167 ts := httptest.NewUnstartedServer(e) 168 169 ts.Listener.Close() 170 ts.Listener = e.Listener 171 ts.Start() 172 173 defer ts.Close() 174 175 tests := []struct { 176 Name string 177 Flags *FlagsState 178 URLs []string 179 Config string 180 Want *config.Config 181 WantErr bool 182 }{ 183 { 184 Name: "not found", 185 Flags: &FlagsState{}, 186 URLs: []string{ts.URL + "/notFound"}, 187 Config: ` 188 global { 189 location = "` + tu.EscapeHCLString(globalLocation) + `" 190 } 191 192 site "example" { 193 test = "http://(127\\.0\\.0\\.1|localhost):" 194 asset "image" { 195 pattern = "\\/img\\/\\w+\\.\\w+" 196 capture = 0 197 find_all = true 198 } 199 }`, 200 Want: &config.Config{ 201 Sites: []config.SiteConfig{ 202 { 203 Assets: []config.AssetConfig{ 204 { 205 Downloads: nil, 206 }, 207 }, 208 }, 209 }, 210 }, 211 WantErr: false, 212 }, 213 { 214 Name: "not found strict", 215 Flags: &FlagsState{Strict: true}, 216 URLs: []string{ts.URL + "/notFound"}, 217 Config: ` 218 global { 219 location = "` + tu.EscapeHCLString(globalLocation) + `" 220 } 221 222 site "example" { 223 test = "http://(127\\.0\\.0\\.1|localhost):" 224 asset "image" { 225 pattern = "\\/img\\/\\w+\\.\\w+" 226 capture = 0 227 find_all = true 228 } 229 }`, 230 Want: &config.Config{ 231 Sites: []config.SiteConfig{ 232 { 233 Assets: []config.AssetConfig{ 234 { 235 Downloads: nil, 236 }, 237 }, 238 }, 239 }, 240 }, 241 WantErr: true, 242 }, 243 { 244 Name: "no urls", 245 Flags: &FlagsState{}, 246 Config: ` 247 global { 248 location = "` + tu.EscapeHCLString(globalLocation) + `" 249 } 250 251 site "example" { 252 test = "http://(127\\.0\\.0\\.1|localhost):" 253 asset "image" { 254 pattern = "\\/img\\/\\w+\\.\\w+" 255 capture = 0 256 find_all = true 257 } 258 }`, 259 Want: &config.Config{ 260 Sites: []config.SiteConfig{ 261 { 262 Assets: []config.AssetConfig{ 263 { 264 Downloads: nil, 265 }, 266 }, 267 }, 268 }, 269 }, 270 WantErr: false, 271 }, 272 { 273 Name: "find one asset", 274 Flags: &FlagsState{}, 275 URLs: []string{ts.URL + testPath}, 276 Config: ` 277 global { 278 location = "` + tu.EscapeHCLString(globalLocation) + `" 279 } 280 281 site "example" { 282 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 283 asset "image" { 284 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 285 capture = 1 286 find_all = false 287 } 288 }`, 289 Want: &config.Config{ 290 Sites: []config.SiteConfig{ 291 { 292 Assets: []config.AssetConfig{ 293 { 294 Downloads: map[string]string{ 295 ts.URL + "/img/a.jpg": filepath.Join(globalLocation, "example", "a.jpg"), 296 }, 297 }, 298 }, 299 InfoMap: map[string]map[string]string{ 300 filepath.Join(globalLocation, "example"): { 301 "url": ts.URL + testPath, 302 }, 303 }, 304 }, 305 }, 306 }, 307 WantErr: false, 308 }, 309 { 310 Name: "simple assets", 311 Flags: &FlagsState{}, 312 URLs: []string{ts.URL + testPath}, 313 Config: ` 314 global { 315 location = "` + tu.EscapeHCLString(globalLocation) + `" 316 } 317 318 site "example" { 319 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 320 asset "image" { 321 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 322 capture = 1 323 find_all = true 324 } 325 }`, 326 Want: &config.Config{ 327 Sites: []config.SiteConfig{ 328 { 329 Assets: []config.AssetConfig{ 330 { 331 Downloads: map[string]string{ 332 ts.URL + "/img/a.jpg": filepath.Join(globalLocation, "example", "a.jpg"), 333 ts.URL + "/img/b.jpg": filepath.Join(globalLocation, "example", "b.jpg"), 334 ts.URL + "/img/c.jpg": filepath.Join(globalLocation, "example", "c.jpg"), 335 }, 336 }, 337 }, 338 InfoMap: map[string]map[string]string{ 339 filepath.Join(globalLocation, "example"): { 340 "url": ts.URL + testPath, 341 }, 342 }, 343 }, 344 }, 345 }, 346 WantErr: false, 347 }, 348 { 349 Name: "relative assets", 350 Flags: &FlagsState{}, 351 URLs: []string{ts.URL + testPath}, 352 Config: ` 353 global { 354 location = "` + tu.EscapeHCLString(globalLocation) + `" 355 } 356 357 site "example" { 358 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 359 asset "image" { 360 pattern = "<img src=\"(/img/[^\"]+)" 361 capture = 1 362 find_all = true 363 } 364 }`, 365 Want: &config.Config{ 366 Sites: []config.SiteConfig{ 367 { 368 Assets: []config.AssetConfig{ 369 { 370 Downloads: map[string]string{ 371 ts.URL + "/img/a.jpg": filepath.Join(globalLocation, "example", "a.jpg"), 372 ts.URL + "/img/b.jpg": filepath.Join(globalLocation, "example", "b.jpg"), 373 ts.URL + "/img/c.jpg": filepath.Join(globalLocation, "example", "c.jpg"), 374 }, 375 }, 376 }, 377 InfoMap: map[string]map[string]string{ 378 filepath.Join(globalLocation, "example"): { 379 "url": ts.URL + testPath, 380 }, 381 }, 382 }, 383 }, 384 }, 385 WantErr: false, 386 }, 387 { 388 Name: "subdirectory from url", 389 Flags: &FlagsState{}, 390 URLs: []string{ts.URL + testPath}, 391 Config: ` 392 global { 393 location = "` + tu.EscapeHCLString(globalLocation) + `" 394 } 395 396 site "example" { 397 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 398 asset "image" { 399 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 400 capture = 1 401 find_all = true 402 } 403 404 subdirectory { 405 pattern = "\\/gallery\\/(\\d+)" 406 capture = 1 407 from = url 408 } 409 }`, 410 Want: &config.Config{ 411 Sites: []config.SiteConfig{ 412 { 413 Assets: []config.AssetConfig{ 414 { 415 Downloads: map[string]string{ 416 ts.URL + "/img/a.jpg": filepath.Join(globalLocation, "example", "123", "a.jpg"), 417 ts.URL + "/img/b.jpg": filepath.Join(globalLocation, "example", "123", "b.jpg"), 418 ts.URL + "/img/c.jpg": filepath.Join(globalLocation, "example", "123", "c.jpg"), 419 }, 420 }, 421 }, 422 InfoMap: map[string]map[string]string{ 423 filepath.Join(globalLocation, "example", "123"): { 424 "url": ts.URL + testPath, 425 }, 426 }, 427 }, 428 }, 429 }, 430 WantErr: false, 431 }, 432 { 433 Name: "bad subdirectory capture group", 434 Flags: &FlagsState{}, 435 URLs: []string{ts.URL + testPath}, 436 Config: ` 437 global { 438 location = "` + tu.EscapeHCLString(globalLocation) + `" 439 } 440 441 site "example" { 442 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 443 asset "image" { 444 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 445 capture = 1 446 find_all = true 447 } 448 449 subdirectory { 450 pattern = "\\/gallery\\/(\\d+)" 451 capture = 12 452 from = url 453 } 454 }`, 455 Want: &config.Config{ 456 Sites: []config.SiteConfig{ 457 { 458 Assets: []config.AssetConfig{ 459 { 460 Downloads: nil, 461 }, 462 }, 463 }, 464 }, 465 }, 466 WantErr: true, 467 }, 468 { 469 Name: "bad asset capture group", 470 Flags: &FlagsState{}, 471 URLs: []string{ts.URL + testPath}, 472 Config: ` 473 global { 474 location = "` + tu.EscapeHCLString(globalLocation) + `" 475 } 476 477 site "example" { 478 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 479 asset "image" { 480 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 481 capture = "name" 482 find_all = true 483 } 484 }`, 485 Want: &config.Config{ 486 Sites: []config.SiteConfig{ 487 { 488 Assets: []config.AssetConfig{ 489 { 490 Downloads: nil, 491 }, 492 }, 493 }, 494 }, 495 }, 496 WantErr: true, 497 }, 498 { 499 Name: "bad captured url", 500 Flags: &FlagsState{}, 501 URLs: []string{ts.URL + testPath}, 502 Config: ` 503 global { 504 location = "` + tu.EscapeHCLString(globalLocation) + `" 505 } 506 507 site "example" { 508 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 509 asset "image" { 510 pattern = "<a href=\"([^\"]+/bad_url/[^\"]+)" 511 capture = 1 512 find_all = true 513 } 514 }`, 515 Want: &config.Config{ 516 Sites: []config.SiteConfig{ 517 { 518 Assets: []config.AssetConfig{ 519 { 520 Downloads: nil, 521 }, 522 }, 523 }, 524 }, 525 }, 526 WantErr: true, 527 }, 528 { 529 Name: "bad info capture", 530 Flags: &FlagsState{}, 531 URLs: []string{ts.URL + testPath}, 532 Config: ` 533 global { 534 location = "` + tu.EscapeHCLString(globalLocation) + `" 535 } 536 537 site "example" { 538 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 539 info "author" { 540 pattern = "Author: @(?P<username>[^<]+)" 541 capture = "username" 542 } 543 544 info "title" { 545 pattern = "<title>([^<]+)" 546 capture = 3 547 } 548 }`, 549 Want: &config.Config{ 550 Sites: []config.SiteConfig{ 551 { 552 Assets: []config.AssetConfig{ 553 { 554 Downloads: nil, 555 }, 556 }, 557 }, 558 }, 559 }, 560 WantErr: true, 561 }, 562 { 563 Name: "subdirectory from body", 564 Flags: &FlagsState{}, 565 URLs: []string{ts.URL + testPath}, 566 Config: ` 567 global { 568 location = "` + tu.EscapeHCLString(globalLocation) + `" 569 } 570 571 site "example" { 572 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 573 asset "image" { 574 pattern = "<img src=\"([^\"]+/img/[^\"]+)" 575 capture = 1 576 find_all = true 577 } 578 579 subdirectory { 580 pattern = "Author: @(?P<username>[^<]+)" 581 capture = "username" 582 from = body 583 } 584 }`, 585 Want: &config.Config{ 586 Sites: []config.SiteConfig{ 587 { 588 Assets: []config.AssetConfig{ 589 { 590 Downloads: map[string]string{ 591 ts.URL + "/img/a.jpg": filepath.Join(globalLocation, "example", "everdrone", "a.jpg"), 592 ts.URL + "/img/b.jpg": filepath.Join(globalLocation, "example", "everdrone", "b.jpg"), 593 ts.URL + "/img/c.jpg": filepath.Join(globalLocation, "example", "everdrone", "c.jpg"), 594 }, 595 }, 596 }, 597 InfoMap: map[string]map[string]string{ 598 filepath.Join(globalLocation, "example", "everdrone"): { 599 "url": ts.URL + testPath, 600 }, 601 }, 602 }, 603 }, 604 }, 605 WantErr: false, 606 }, 607 { 608 Name: "transform url", 609 Flags: &FlagsState{}, 610 URLs: []string{ts.URL + testPath}, 611 Config: ` 612 global { 613 location = "` + tu.EscapeHCLString(globalLocation) + `" 614 } 615 616 site "example" { 617 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 618 asset "video" { 619 pattern = "<video src=\"([^\"]+)" 620 capture = 1 621 find_all = true 622 623 transform url { 624 pattern = "(.+)small(.*)" 625 replace = "$${1}large$2" 626 } 627 } 628 }`, 629 Want: &config.Config{ 630 Sites: []config.SiteConfig{ 631 { 632 Assets: []config.AssetConfig{ 633 { 634 Downloads: map[string]string{ 635 ts.URL + "/video/a/large.mp4": filepath.Join(globalLocation, "example", "large.mp4"), 636 ts.URL + "/video/b/large.mp4": filepath.Join(globalLocation, "example", "large.mp4"), 637 ts.URL + "/video/c/large.mp4": filepath.Join(globalLocation, "example", "large.mp4"), 638 }, 639 }, 640 }, 641 InfoMap: map[string]map[string]string{ 642 filepath.Join(globalLocation, "example"): { 643 "url": ts.URL + testPath, 644 }, 645 }, 646 }, 647 }, 648 }, 649 WantErr: false, 650 }, 651 { 652 Name: "transform filename", 653 Flags: &FlagsState{}, 654 URLs: []string{ts.URL + testPath}, 655 Config: ` 656 global { 657 location = "` + tu.EscapeHCLString(globalLocation) + `" 658 } 659 660 site "example" { 661 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 662 asset "video" { 663 pattern = "<video src=\"([^\"]+)" 664 capture = 1 665 find_all = true 666 667 transform filename { 668 pattern = ".+\\/video\\/(?P<id>\\w+)\\/(\\w+)\\.(?P<extension>\\w+)" 669 replace = "$${id}.$${extension}" 670 } 671 } 672 }`, 673 Want: &config.Config{ 674 Sites: []config.SiteConfig{ 675 { 676 Assets: []config.AssetConfig{ 677 { 678 Downloads: map[string]string{ 679 ts.URL + "/video/a/small.mp4": filepath.Join(globalLocation, "example", "a.mp4"), 680 ts.URL + "/video/b/small.mp4": filepath.Join(globalLocation, "example", "b.mp4"), 681 ts.URL + "/video/c/small.mp4": filepath.Join(globalLocation, "example", "c.mp4"), 682 }, 683 }, 684 }, 685 InfoMap: map[string]map[string]string{ 686 filepath.Join(globalLocation, "example"): { 687 "url": ts.URL + testPath, 688 }, 689 }, 690 }, 691 }, 692 }, 693 WantErr: false, 694 }, 695 { 696 Name: "transform filename absolute", 697 Flags: &FlagsState{}, 698 URLs: []string{ts.URL + testPath}, 699 Config: ` 700 global { 701 location = "` + tu.EscapeHCLString(globalLocation) + `" 702 } 703 704 site "example" { 705 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 706 asset "video" { 707 pattern = "<video src=\"([^\"]+)" 708 capture = 1 709 find_all = true 710 711 transform filename { 712 pattern = ".+\\/video\\/(?P<id>\\w+)\\/(\\w+)\\.(?P<extension>\\w+)" 713 replace = "` + tu.EscapeHCLString(root) + `$${id}.$${extension}" 714 } 715 } 716 }`, 717 Want: &config.Config{ 718 Sites: []config.SiteConfig{ 719 { 720 Assets: []config.AssetConfig{ 721 { 722 Downloads: map[string]string{ 723 ts.URL + "/video/a/small.mp4": filepath.Join(root, "a.mp4"), 724 ts.URL + "/video/b/small.mp4": filepath.Join(root, "b.mp4"), 725 ts.URL + "/video/c/small.mp4": filepath.Join(root, "c.mp4"), 726 }, 727 }, 728 }, 729 InfoMap: map[string]map[string]string{ 730 filepath.Join(globalLocation, "example"): { 731 "url": ts.URL + testPath, 732 }, 733 }, 734 }, 735 }, 736 }, 737 WantErr: false, 738 }, 739 { 740 Name: "info", 741 Flags: &FlagsState{}, 742 URLs: []string{ts.URL + testPath}, 743 Config: ` 744 global { 745 location = "` + tu.EscapeHCLString(globalLocation) + `" 746 } 747 748 site "example" { 749 test = "http:\\/\\/127\\.0\\.0\\.1:\\d+" 750 info "author" { 751 pattern = "Author: @(?P<username>[^<]+)" 752 capture = "username" 753 } 754 755 info "title" { 756 pattern = "<title>([^<]+)" 757 capture = 1 758 } 759 }`, 760 Want: &config.Config{ 761 Sites: []config.SiteConfig{ 762 { 763 Assets: []config.AssetConfig{ 764 { 765 Downloads: map[string]string(nil), 766 }, 767 }, 768 InfoMap: map[string]map[string]string{ 769 filepath.Join(globalLocation, "example"): { 770 "url": ts.URL + testPath, 771 "author": "everdrone", 772 "title": "Grab Test Server", 773 }, 774 }, 775 }, 776 }, 777 }, 778 WantErr: false, 779 }, 780 } 781 782 for _, tt := range tests { 783 t.Run(tt.Name, func(tc *testing.T) { 784 g := New(nil) 785 g.Flags = tt.Flags 786 787 config, _, regexCache, diags := config.Parse([]byte(tt.Config), "test.hcl") 788 if diags.HasErrors() { 789 tc.Errorf("got errors: %+v", diags) 790 } 791 g.Config = config 792 g.RegexCache = regexCache 793 794 g.URLs = tt.URLs 795 g.BuildSiteCache() 796 797 gotDiags := g.BuildAssetCache() 798 799 if gotDiags.HasErrors() != tt.WantErr { 800 tc.Errorf("got: %+v, want errors: %+v", gotDiags.HasErrors(), tt.WantErr) 801 } 802 803 for i, site := range g.Config.Sites { 804 for j, asset := range site.Assets { 805 if !reflect.DeepEqual(asset.Downloads, tt.Want.Sites[i].Assets[j].Downloads) { 806 tc.Errorf("got: %+v, want: %+v", asset.Downloads, tt.Want.Sites[i].Assets[j].Downloads) 807 } 808 } 809 810 gotInfoMap := site.InfoMap 811 wantInfoMap := tt.Want.Sites[i].InfoMap 812 813 compareInfoMaps(tc, gotInfoMap, wantInfoMap) 814 } 815 }) 816 } 817 } 818 819 func compareInfoMaps(t *testing.T, got, want config.InfoCacheMap) { 820 // check that the keys are the same 821 if !reflect.DeepEqual(getMapKeys(got), getMapKeys(want)) { 822 t.Errorf("got: %+v, want: %+v", getMapKeys(got), getMapKeys(want)) 823 } 824 825 // check that the values are the same but ignore the timestamp 826 for k, v := range got { 827 for k2, v2 := range v { 828 if k2 == "timestamp" { 829 continue 830 } 831 if want[k][k2] != v2 { 832 t.Errorf("got: %+v, want: %+v", got, want) 833 } 834 } 835 } 836 } 837 838 func getMapKeys[T any](m map[string]T) []string { 839 keys := make([]string, 0, len(m)) 840 for k := range m { 841 keys = append(keys, k) 842 } 843 return keys 844 }