github.com/requaos/go-readability@v0.0.0-20181130134248-61a0ddd715c5/utils.go (about)

     1  package readability
     2  
     3  import (
     4  	"crypto/md5"
     5  	"fmt"
     6  	"os"
     7  	"strings"
     8  	"unicode/utf8"
     9  
    10  	"github.com/PuerkitoBio/goquery"
    11  )
    12  
    13  func createDocFromFile(path string) (*goquery.Document, error) {
    14  	// Open file
    15  	src, err := os.Open(path)
    16  	if err != nil {
    17  		return nil, err
    18  	}
    19  	defer src.Close()
    20  
    21  	// Create document
    22  	return goquery.NewDocumentFromReader(src)
    23  }
    24  
    25  func hashNode(node *goquery.Selection) string {
    26  	if node == nil {
    27  		return ""
    28  	}
    29  
    30  	html, _ := node.Html()
    31  	return fmt.Sprintf("%x", md5.Sum([]byte(html)))
    32  }
    33  
    34  func strLen(str string) int {
    35  	return utf8.RuneCountInString(str)
    36  }
    37  
    38  func findSeparator(str string, separators ...string) (int, string) {
    39  	words := strings.Fields(str)
    40  	for i, word := range words {
    41  		for _, separator := range separators {
    42  			if word == separator {
    43  				return i, separator
    44  			}
    45  		}
    46  	}
    47  
    48  	return -1, ""
    49  }
    50  
    51  func hasSeparator(str string, separators ...string) bool {
    52  	idx, _ := findSeparator(str, separators...)
    53  	return idx != -1
    54  }
    55  
    56  func removeSeparator(str string, separators ...string) string {
    57  	words := strings.Fields(str)
    58  	finalWords := []string{}
    59  
    60  	for _, word := range words {
    61  		for _, separator := range separators {
    62  			if word != separator {
    63  				finalWords = append(finalWords, word)
    64  			}
    65  		}
    66  	}
    67  
    68  	return strings.Join(finalWords, " ")
    69  }
    70  
    71  func normalizeText(str string) string {
    72  	return strings.Join(strings.Fields(str), " ")
    73  }