github.com/requaos/go-readability@v0.0.0-20181130134248-61a0ddd715c5/read_test.go (about) 1 package readability 2 3 import ( 4 nurl "net/url" 5 "strings" 6 "testing" 7 "time" 8 9 "github.com/PuerkitoBio/goquery" 10 ) 11 12 func BenchmarkReadability(b *testing.B) { 13 urls := []string{ 14 "https://www.nytimes.com/2018/01/21/technology/inside-amazon-go-a-store-of-the-future.html", 15 "http://www.dwmkerr.com/the-death-of-microservice-madness-in-2018/", 16 "https://www.eurekalert.org/pub_releases/2018-01/uoe-stt011118.php", 17 "http://www.slate.com/articles/arts/books/2018/01/the_reviewer_s_fallacy_when_critics_aren_t_critical_enough.html", 18 "https://www.theatlantic.com/business/archive/2018/01/german-board-games-catan/550826/?single_page=true", 19 "http://www.weeklystandard.com/the-anti-bamboozler/article/2011032", 20 "http://www.inquiriesjournal.com/articles/1657/the-impact-of-listening-to-music-on-cognitive-performance", 21 } 22 23 for _, url := range urls { 24 parsedURL, _ := nurl.Parse(url) 25 FromURL(parsedURL, 5*time.Second) 26 } 27 } 28 29 func Test_removeScripts(t *testing.T) { 30 // Load test file 31 testDoc, err := createDocFromFile("test/removeScripts.html") 32 if err != nil { 33 t.Errorf("Failed to open test file: %v", err) 34 } 35 36 // Remove scripts and get HTML 37 removeScripts(testDoc) 38 html, err := testDoc.Html() 39 if err != nil { 40 t.Errorf("Failed to read HTML: %v", err) 41 } 42 43 // Compare results 44 html = rxSpaces.ReplaceAllString(html, "") 45 want := "<!DOCTYPE html>" + 46 "<html><head><title>Test Remove Scripts</title></head>" + 47 "<body></body></html>" 48 if html != want { 49 t.Errorf("Want: %s\nGot: %s", want, html) 50 } 51 } 52 53 func Test_replaceBrs(t *testing.T) { 54 // Load test file 55 testDoc, err := createDocFromFile("test/removeBrs.html") 56 if err != nil { 57 t.Errorf("Failed to open test file: %v", err) 58 } 59 60 // Replace BRs and get HTML 61 replaceBrs(testDoc) 62 html, err := testDoc.Html() 63 if err != nil { 64 t.Errorf("Failed to read HTML: %v", err) 65 } 66 67 // Compare results 68 html = rxSpaces.ReplaceAllString(html, "") 69 want := "<!DOCTYPE html>" + 70 "<html><head><title>Test Remove BRs</title></head>" + 71 "<body><div>foo<br/>bar<p>a b c</p></div></body></html>" 72 if html != want { 73 t.Errorf("Want: %s\nGot: %s", want, html) 74 } 75 } 76 77 func Test_prepDocument(t *testing.T) { 78 // Load test file 79 testDoc, err := createDocFromFile("test/prepDocument.html") 80 if err != nil { 81 t.Errorf("Failed to open test file: %v", err) 82 } 83 84 // Prep document and get HTML 85 prepDocument(testDoc) 86 html, err := testDoc.Html() 87 if err != nil { 88 t.Errorf("Failed to read HTML: %v", err) 89 } 90 91 // Compare results 92 html = rxSpaces.ReplaceAllString(html, "") 93 want := "<!DOCTYPE html>" + 94 "<html><head><title>Test Prep Documents</title></head>" + 95 "<body><span>Bip bop</span>" + 96 "<div>foo<br/>bar<p>a b c</p></div>" + 97 "</body></html>" 98 if html != want { 99 t.Errorf("Want: %s\nGot: %s", want, html) 100 } 101 } 102 103 func Test_getArticleTitle(t *testing.T) { 104 tests := make(map[string]string) 105 tests["test/getArticleTitle1.html"] = "Test Get Article Title 1" 106 tests["test/getArticleTitle2.html"] = "Get Awesome Article Title 2" 107 tests["test/getArticleTitle3.html"] = "Test: Get Article Title 3" 108 109 for path, want := range tests { 110 // Load test file 111 testDoc, err := createDocFromFile(path) 112 if err != nil { 113 t.Errorf("Failed to open test file: %v", err) 114 } 115 116 // Get title and compare it 117 title := getArticleTitle(testDoc) 118 if title != want { 119 t.Errorf("Want: %s\nGot: %s", want, title) 120 } 121 } 122 } 123 124 func Test_getArticleMetadata(t *testing.T) { 125 tests := make(map[string]Metadata) 126 tests["test/getArticleMetadata1.html"] = Metadata{ 127 Title: "Just-released Minecraft exploit makes it easy to crash game servers", 128 Image: "http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg", 129 Excerpt: "Two-year-old bug exposes thousands of servers to crippling attack.", 130 } 131 tests["test/getArticleMetadata2.html"] = Metadata{ 132 Title: "Daring Fireball: Colophon", 133 } 134 135 for path, want := range tests { 136 // Load test file 137 testDoc, err := createDocFromFile(path) 138 if err != nil { 139 t.Errorf("Failed to open test file: %v", err) 140 } 141 142 // Get metadata and compare it 143 metadata := getArticleMetadata(testDoc) 144 if metadata.Title != want.Title || metadata.Image != want.Image || metadata.Excerpt != want.Excerpt { 145 t.Errorf("Want: '%s',%s,'%s'\nGot: '%s',%s,'%s'", 146 want.Title, want.Image, want.Excerpt, 147 metadata.Title, metadata.Image, metadata.Excerpt) 148 } 149 } 150 } 151 152 func Test_hasSinglePInsideElement(t *testing.T) { 153 scenario1 := `<div>Hello</div>` 154 scenario2 := `<div><p>Hello</p></div>` 155 scenario3 := `<div><p>Hello</p><p>this is test</p></div>` 156 157 tests := map[string]bool{ 158 scenario1: false, 159 scenario2: true, 160 scenario3: false, 161 } 162 163 for test, want := range tests { 164 // Generate test document 165 reader := strings.NewReader(test) 166 doc, err := goquery.NewDocumentFromReader(reader) 167 if err != nil { 168 t.Errorf("Failed to generate test document: %v", err) 169 } 170 171 // Check element 172 result := hasSinglePInsideElement(doc.Find("div").First()) 173 if result != want { 174 t.Errorf("%s\nWant: %t got: %t", test, want, result) 175 } 176 } 177 } 178 179 func Test_toAbsoluteURI(t *testing.T) { 180 base, _ := nurl.ParseRequestURI("http://localhost:8080") 181 tests := map[string]string{ 182 "/test/123": "http://localhost:8080/test/123", 183 "test/123": "http://localhost:8080/test/123", 184 "https://www.google.com": "https://www.google.com", 185 "ftp://ftp.server.com": "ftp://ftp.server.com", 186 "www.google.com": "http://localhost:8080/www.google.com", 187 "http//www.google.com": "http://localhost:8080/http//www.google.com", 188 "//google.com": "http://google.com", 189 } 190 191 for test, want := range tests { 192 result := toAbsoluteURI(test, base) 193 if result != want { 194 t.Errorf("%s\nWant: %s got: %s", test, want, result) 195 } 196 } 197 }