github.com/requaos/go-readability@v0.0.0-20181130134248-61a0ddd715c5/utils.go (about) 1 package readability 2 3 import ( 4 "crypto/md5" 5 "fmt" 6 "os" 7 "strings" 8 "unicode/utf8" 9 10 "github.com/PuerkitoBio/goquery" 11 ) 12 13 func createDocFromFile(path string) (*goquery.Document, error) { 14 // Open file 15 src, err := os.Open(path) 16 if err != nil { 17 return nil, err 18 } 19 defer src.Close() 20 21 // Create document 22 return goquery.NewDocumentFromReader(src) 23 } 24 25 func hashNode(node *goquery.Selection) string { 26 if node == nil { 27 return "" 28 } 29 30 html, _ := node.Html() 31 return fmt.Sprintf("%x", md5.Sum([]byte(html))) 32 } 33 34 func strLen(str string) int { 35 return utf8.RuneCountInString(str) 36 } 37 38 func findSeparator(str string, separators ...string) (int, string) { 39 words := strings.Fields(str) 40 for i, word := range words { 41 for _, separator := range separators { 42 if word == separator { 43 return i, separator 44 } 45 } 46 } 47 48 return -1, "" 49 } 50 51 func hasSeparator(str string, separators ...string) bool { 52 idx, _ := findSeparator(str, separators...) 53 return idx != -1 54 } 55 56 func removeSeparator(str string, separators ...string) string { 57 words := strings.Fields(str) 58 finalWords := []string{} 59 60 for _, word := range words { 61 for _, separator := range separators { 62 if word != separator { 63 finalWords = append(finalWords, word) 64 } 65 } 66 } 67 68 return strings.Join(finalWords, " ") 69 } 70 71 func normalizeText(str string) string { 72 return strings.Join(strings.Fields(str), " ") 73 }