github.com/requaos/go-readability@v0.0.0-20181130134248-61a0ddd715c5/read_test.go (about)

     1  package readability
     2  
     3  import (
     4  	nurl "net/url"
     5  	"strings"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/PuerkitoBio/goquery"
    10  )
    11  
    12  func BenchmarkReadability(b *testing.B) {
    13  	urls := []string{
    14  		"https://www.nytimes.com/2018/01/21/technology/inside-amazon-go-a-store-of-the-future.html",
    15  		"http://www.dwmkerr.com/the-death-of-microservice-madness-in-2018/",
    16  		"https://www.eurekalert.org/pub_releases/2018-01/uoe-stt011118.php",
    17  		"http://www.slate.com/articles/arts/books/2018/01/the_reviewer_s_fallacy_when_critics_aren_t_critical_enough.html",
    18  		"https://www.theatlantic.com/business/archive/2018/01/german-board-games-catan/550826/?single_page=true",
    19  		"http://www.weeklystandard.com/the-anti-bamboozler/article/2011032",
    20  		"http://www.inquiriesjournal.com/articles/1657/the-impact-of-listening-to-music-on-cognitive-performance",
    21  	}
    22  
    23  	for _, url := range urls {
    24  		parsedURL, _ := nurl.Parse(url)
    25  		FromURL(parsedURL, 5*time.Second)
    26  	}
    27  }
    28  
    29  func Test_removeScripts(t *testing.T) {
    30  	// Load test file
    31  	testDoc, err := createDocFromFile("test/removeScripts.html")
    32  	if err != nil {
    33  		t.Errorf("Failed to open test file: %v", err)
    34  	}
    35  
    36  	// Remove scripts and get HTML
    37  	removeScripts(testDoc)
    38  	html, err := testDoc.Html()
    39  	if err != nil {
    40  		t.Errorf("Failed to read HTML: %v", err)
    41  	}
    42  
    43  	// Compare results
    44  	html = rxSpaces.ReplaceAllString(html, "")
    45  	want := "<!DOCTYPE html>" +
    46  		"<html><head><title>Test Remove Scripts</title></head>" +
    47  		"<body></body></html>"
    48  	if html != want {
    49  		t.Errorf("Want: %s\nGot: %s", want, html)
    50  	}
    51  }
    52  
    53  func Test_replaceBrs(t *testing.T) {
    54  	// Load test file
    55  	testDoc, err := createDocFromFile("test/removeBrs.html")
    56  	if err != nil {
    57  		t.Errorf("Failed to open test file: %v", err)
    58  	}
    59  
    60  	// Replace BRs and get HTML
    61  	replaceBrs(testDoc)
    62  	html, err := testDoc.Html()
    63  	if err != nil {
    64  		t.Errorf("Failed to read HTML: %v", err)
    65  	}
    66  
    67  	// Compare results
    68  	html = rxSpaces.ReplaceAllString(html, "")
    69  	want := "<!DOCTYPE html>" +
    70  		"<html><head><title>Test Remove BRs</title></head>" +
    71  		"<body><div>foo<br/>bar<p>a b c</p></div></body></html>"
    72  	if html != want {
    73  		t.Errorf("Want: %s\nGot: %s", want, html)
    74  	}
    75  }
    76  
    77  func Test_prepDocument(t *testing.T) {
    78  	// Load test file
    79  	testDoc, err := createDocFromFile("test/prepDocument.html")
    80  	if err != nil {
    81  		t.Errorf("Failed to open test file: %v", err)
    82  	}
    83  
    84  	// Prep document and get HTML
    85  	prepDocument(testDoc)
    86  	html, err := testDoc.Html()
    87  	if err != nil {
    88  		t.Errorf("Failed to read HTML: %v", err)
    89  	}
    90  
    91  	// Compare results
    92  	html = rxSpaces.ReplaceAllString(html, "")
    93  	want := "<!DOCTYPE html>" +
    94  		"<html><head><title>Test Prep Documents</title></head>" +
    95  		"<body><span>Bip bop</span>" +
    96  		"<div>foo<br/>bar<p>a b c</p></div>" +
    97  		"</body></html>"
    98  	if html != want {
    99  		t.Errorf("Want: %s\nGot: %s", want, html)
   100  	}
   101  }
   102  
   103  func Test_getArticleTitle(t *testing.T) {
   104  	tests := make(map[string]string)
   105  	tests["test/getArticleTitle1.html"] = "Test Get Article Title 1"
   106  	tests["test/getArticleTitle2.html"] = "Get Awesome Article Title 2"
   107  	tests["test/getArticleTitle3.html"] = "Test: Get Article Title 3"
   108  
   109  	for path, want := range tests {
   110  		// Load test file
   111  		testDoc, err := createDocFromFile(path)
   112  		if err != nil {
   113  			t.Errorf("Failed to open test file: %v", err)
   114  		}
   115  
   116  		// Get title and compare it
   117  		title := getArticleTitle(testDoc)
   118  		if title != want {
   119  			t.Errorf("Want: %s\nGot: %s", want, title)
   120  		}
   121  	}
   122  }
   123  
   124  func Test_getArticleMetadata(t *testing.T) {
   125  	tests := make(map[string]Metadata)
   126  	tests["test/getArticleMetadata1.html"] = Metadata{
   127  		Title:   "Just-released Minecraft exploit makes it easy to crash game servers",
   128  		Image:   "http://cdn.arstechnica.net/wp-content/uploads/2015/04/server-crash-640x426.jpg",
   129  		Excerpt: "Two-year-old bug exposes thousands of servers to crippling attack.",
   130  	}
   131  	tests["test/getArticleMetadata2.html"] = Metadata{
   132  		Title: "Daring Fireball: Colophon",
   133  	}
   134  
   135  	for path, want := range tests {
   136  		// Load test file
   137  		testDoc, err := createDocFromFile(path)
   138  		if err != nil {
   139  			t.Errorf("Failed to open test file: %v", err)
   140  		}
   141  
   142  		// Get metadata and compare it
   143  		metadata := getArticleMetadata(testDoc)
   144  		if metadata.Title != want.Title || metadata.Image != want.Image || metadata.Excerpt != want.Excerpt {
   145  			t.Errorf("Want: '%s',%s,'%s'\nGot: '%s',%s,'%s'",
   146  				want.Title, want.Image, want.Excerpt,
   147  				metadata.Title, metadata.Image, metadata.Excerpt)
   148  		}
   149  	}
   150  }
   151  
   152  func Test_hasSinglePInsideElement(t *testing.T) {
   153  	scenario1 := `<div>Hello</div>`
   154  	scenario2 := `<div><p>Hello</p></div>`
   155  	scenario3 := `<div><p>Hello</p><p>this is test</p></div>`
   156  
   157  	tests := map[string]bool{
   158  		scenario1: false,
   159  		scenario2: true,
   160  		scenario3: false,
   161  	}
   162  
   163  	for test, want := range tests {
   164  		// Generate test document
   165  		reader := strings.NewReader(test)
   166  		doc, err := goquery.NewDocumentFromReader(reader)
   167  		if err != nil {
   168  			t.Errorf("Failed to generate test document: %v", err)
   169  		}
   170  
   171  		// Check element
   172  		result := hasSinglePInsideElement(doc.Find("div").First())
   173  		if result != want {
   174  			t.Errorf("%s\nWant: %t got: %t", test, want, result)
   175  		}
   176  	}
   177  }
   178  
   179  func Test_toAbsoluteURI(t *testing.T) {
   180  	base, _ := nurl.ParseRequestURI("http://localhost:8080")
   181  	tests := map[string]string{
   182  		"/test/123":              "http://localhost:8080/test/123",
   183  		"test/123":               "http://localhost:8080/test/123",
   184  		"https://www.google.com": "https://www.google.com",
   185  		"ftp://ftp.server.com":   "ftp://ftp.server.com",
   186  		"www.google.com":         "http://localhost:8080/www.google.com",
   187  		"http//www.google.com":   "http://localhost:8080/http//www.google.com",
   188  		"//google.com":           "http://google.com",
   189  	}
   190  
   191  	for test, want := range tests {
   192  		result := toAbsoluteURI(test, base)
   193  		if result != want {
   194  			t.Errorf("%s\nWant: %s got: %s", test, want, result)
   195  		}
   196  	}
   197  }