github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/07_condense_bottom_up_v2.go (about) 1 package domclean2 2 3 import ( 4 "bytes" 5 "fmt" 6 "log" 7 "strings" 8 9 "github.com/pbberlin/tools/net/http/dom" 10 "golang.org/x/net/html" 11 ) 12 13 func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) { 14 15 if b == nil { 16 b = new(bytes.Buffer) 17 } 18 if tpar == nil { 19 tpar = &html.Node{ 20 Type: n.Type, 21 DataAtom: n.DataAtom, 22 Data: n.Data, 23 Attr: make([]html.Attribute, len(n.Attr)), 24 } 25 copy(tpar.Attr, n.Attr) 26 } 27 28 switch { 29 case n.Type == html.ElementNode && n.Data == "a": 30 n.Parent.RemoveChild(n) 31 tpar.AppendChild(n) 32 // wpf(b, "[a] ") 33 case n.Type == html.ElementNode && n.Data == "img": 34 // img2Link(n) 35 n.Parent.RemoveChild(n) 36 tpar.AppendChild(n) 37 case n.Data == "em" || n.Data == "strong": 38 wpf(b, "[%v l%v] ", n.Data, depth) 39 n.Parent.RemoveChild(n) 40 tpar.AppendChild(n) 41 case n.Data == "label" || n.Data == "input" || n.Data == "textarea": 42 n.Parent.RemoveChild(n) 43 tpar.AppendChild(n) 44 case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul": 45 n.Parent.RemoveChild(n) 46 tpar.AppendChild(n) 47 case n.Data == "span": 48 for c := n.FirstChild; c != nil; c = c.NextSibling { 49 n.RemoveChild(c) 50 tpar.AppendChild(c) 51 } 52 n.Parent.RemoveChild(n) 53 case n.Type == html.TextNode && n.Data != "": 54 n.Data = strings.TrimSpace(n.Data) 55 n.Data += " " 56 wpf(b, n.Data) 57 n.Parent.RemoveChild(n) 58 tpar.AppendChild(n) 59 default: 60 log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data) 61 n.Parent.RemoveChild(n) 62 } 63 64 // 65 // 66 children := []*html.Node{} 67 for c := n.FirstChild; c != nil; c = c.NextSibling { 68 // fmt.Printf("still has children %v\n", c.Data) 69 children = append(children, c) // assembling separately, before removing. 70 } 71 for _, c := range children { 72 flattenSubtreeV2(c, b, depth+1, tpar) 73 } 74 75 return b, tpar 76 } 77 78 func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) { 79 80 if lvl < lvlDo { 81 82 cs := []*html.Node{} 83 for c := n.FirstChild; c != nil; c = c.NextSibling { 84 cs = append(cs, c) 85 } 86 for _, c := range cs { 87 condenseBottomUpV2(c, lvl+1, lvlDo, types) 88 } 89 90 } else { 91 92 // log.Printf("action on %v %v\n", lvl, lvlDo) 93 94 switch { 95 96 case n.Type == html.ElementNode && types[n.Data]: 97 98 oldPar := n.Parent 99 if oldPar == nil { 100 return 101 } 102 103 b, newPar := flattenSubtreeV2(n, nil, 0, nil) 104 105 // placeholder := dom.Nd("div") 106 // par := n.Parent 107 // par.InsertBefore(placeholder, n.NextSibling) 108 // par.RemoveChild(n) 109 // par.InsertBefore(n2, placeholder) 110 111 for c := oldPar.FirstChild; c != nil; c = c.NextSibling { 112 oldPar.RemoveChild(c) 113 } 114 115 for c := newPar.FirstChild; c != nil; c = c.NextSibling { 116 newPar.RemoveChild(c) 117 oldPar.AppendChild(c) 118 } 119 120 if lvlDo > 4 { 121 bx := dom.PrintSubtree(newPar) 122 fmt.Printf("%s", bx) 123 } 124 125 // n = n2 126 127 nodeRepl := dom.Nd("text", b.String()) 128 129 if false { 130 131 // Remove all existing children. 132 // Direct loop impossible, since "NextSibling" is set to nil by Remove(). 133 children := []*html.Node{} 134 for c := n.FirstChild; c != nil; c = c.NextSibling { 135 children = append(children, c) // assembling separately, before removing. 136 } 137 for _, c := range children { 138 log.Printf("c %4v rem from %4v ", c.Data, n.Data) 139 n.RemoveChild(c) 140 } 141 142 // we can't put our replacement "under" an image, since img cannot have children 143 if n.Type == html.ElementNode && n.Data == "img" { 144 n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end 145 n.Parent.RemoveChild(n) 146 } else { 147 n.AppendChild(nodeRepl) 148 } 149 150 // Insert a || and a newline before every <a...> 151 // if n.Data == "a" { 152 // n.Parent.InsertBefore(dom.Nd("text", " || "), n) 153 // } 154 } 155 156 default: 157 } 158 159 } 160 161 }