github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/06_img2link.go (about) 1 package domclean2 2 3 import ( 4 "fmt" 5 "strings" 6 7 "github.com/pbberlin/tools/net/http/dom" 8 "github.com/pbberlin/tools/stringspb" 9 "golang.org/x/net/html" 10 ) 11 12 func closureTextNodeExists(img *html.Node) (found bool) { 13 14 txt := attrX(img.Attr, "title") 15 if len(txt) < 5 { 16 return false 17 } 18 txt = stringspb.NormalizeInnerWhitespace(txt) 19 txt = strings.TrimSpace(txt) 20 21 // We dont search entire document, but three levels above image subtree 22 grandParent := img 23 for i := 0; i < 4; i++ { 24 if grandParent.Parent != nil { 25 grandParent = grandParent.Parent 26 } else { 27 // log.Printf("LevelsUp %v for %q", i, txt) 28 break 29 } 30 } 31 32 var recurseTextNodes func(n *html.Node) 33 recurseTextNodes = func(n *html.Node) { 34 35 if found { 36 return 37 } 38 39 cc := []*html.Node{} 40 for c := n.FirstChild; c != nil; c = c.NextSibling { 41 cc = append(cc, c) 42 } 43 for _, c := range cc { 44 recurseTextNodes(c) 45 } 46 47 if n.Type == html.TextNode { 48 n.Data = stringspb.NormalizeInnerWhitespace(n.Data) 49 if len(n.Data) >= len(txt) { 50 // if strings.Contains(txt, "FDP") { 51 // log.Printf("%25v %v", stringspb.Ellipsoider(txt, 10), stringspb.Ellipsoider(n.Data, 10)) 52 // } 53 fnd := strings.Contains(n.Data, txt) 54 if fnd { 55 found = true 56 return 57 } 58 } 59 } 60 } 61 recurseTextNodes(grandParent) 62 63 return 64 } 65 66 func img2Link(img *html.Node) { 67 68 if img.Data == "img" { 69 70 img.Data = "a" 71 for i := 0; i < len(img.Attr); i++ { 72 if img.Attr[i].Key == "src" { 73 img.Attr[i].Key = "href" 74 } 75 } 76 77 double := closureTextNodeExists(img) 78 imgContent := "" 79 title := attrX(img.Attr, "title") 80 81 if double { 82 imgContent = fmt.Sprintf("[img] %v %v | ", 83 "[ctdr]", // content title double removed 84 urlBeautify(attrX(img.Attr, "href"))) 85 86 } else { 87 imgContent = fmt.Sprintf("[img] %v %v | ", 88 title, 89 urlBeautify(attrX(img.Attr, "href"))) 90 } 91 92 img.Attr = attrSet(img.Attr, "cfrom", "img") 93 nd := dom.Nd("text", imgContent) 94 img.AppendChild(nd) 95 } 96 97 } 98 99 func recurseImg2Link(n *html.Node) { 100 101 cc := []*html.Node{} 102 for c := n.FirstChild; c != nil; c = c.NextSibling { 103 cc = append(cc, c) 104 } 105 for _, c := range cc { 106 recurseImg2Link(c) 107 } 108 109 if n.Type == html.ElementNode && n.Data == "img" { 110 img2Link(n) 111 } 112 }