github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/07_condense_bottom_up_v3.go (about) 1 package domclean2 2 3 import ( 4 "log" 5 "strings" 6 7 "github.com/pbberlin/tools/net/http/dom" 8 "golang.org/x/net/html" 9 ) 10 11 func flattenSubtreeV3(n, nClone *html.Node) { 12 13 // log.Printf("fsbo\n") 14 flattenSubtreeV3Inner(n, nClone, 0) 15 16 } 17 18 var standard = map[string]bool{ 19 20 "title": true, 21 22 "p": true, 23 "div": true, 24 "ul": true, 25 "ol": true, 26 "li": true, 27 "h1": true, 28 "h2": true, 29 30 "em": true, 31 "strong": true, 32 "label": true, 33 "input": true, 34 "textarea": true, 35 36 "form": true, 37 "blockquote": true, 38 } 39 40 func flattenSubtreeV3Inner(n, nClone *html.Node, lvl int) { 41 42 // log.Printf("fsbi\n") 43 44 for ch := n.FirstChild; ch != nil; ch = ch.NextSibling { 45 46 chClone := dom.CloneNode(ch) 47 48 switch { 49 50 case ch.Type == html.ElementNode && standard[ch.Data]: 51 nClone.AppendChild(chClone) 52 flattenSubtreeV3Inner(ch, chClone, lvl+1) 53 54 case ch.Type == html.ElementNode && ch.Data == "a": 55 nClone.AppendChild(chClone) 56 flattenSubtreeV3Inner(ch, chClone, lvl+1) 57 58 case ch.Type == html.ElementNode && ch.Data == "img": 59 nClone.AppendChild(chClone) 60 61 case ch.Data == "span": 62 // log.Printf(strings.Repeat(" ", lvl) + "span \n") 63 for cch := ch.FirstChild; cch != nil; cch = cch.NextSibling { 64 // log.Printf(strings.Repeat(" ", lvl)+"span child %v", cch.Data) 65 cchClone := dom.CloneNode(cch) 66 nClone.AppendChild(cchClone) 67 nClone.AppendChild(dom.Nd("text", " ")) 68 flattenSubtreeV3Inner(cch, cchClone, lvl+1) 69 } 70 71 case ch.Type == html.TextNode && ch.Data != "": 72 chClone.Data = strings.TrimSpace(chClone.Data) 73 chClone.Data += " " 74 nClone.AppendChild(chClone) 75 76 default: 77 // nClone.AppendChild(chClone) 78 log.Printf("unhandled %s %s\n", dom.NodeTypeStr(ch.Type), ch.Data) 79 80 } 81 82 } 83 84 } 85 86 func condenseBottomUpV3(n *html.Node, lvl, lvlDo int, unusedTypes map[string]bool) { 87 88 if lvl < lvlDo { 89 90 // Delve deeper until reaching lvlDo 91 cs := []*html.Node{} 92 for c := n.FirstChild; c != nil; c = c.NextSibling { 93 cs = append(cs, c) 94 } 95 for _, c := range cs { 96 condenseBottomUpV3(c, lvl+1, lvlDo, unusedTypes) 97 } 98 99 } else { 100 101 if n.Type == html.ElementNode { 102 103 nClone := dom.CloneNode(n) 104 flattenSubtreeV3(n, nClone) 105 106 nParent := n.Parent 107 nParent.InsertBefore(nClone, n) 108 nParent.RemoveChild(n) 109 110 // bx := dom.PrintSubtree(nParent) 111 // fmt.Printf("%s", bx) 112 } 113 114 } 115 116 }