github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v1.go (about) 1 package domclean2 2 3 import ( 4 "github.com/pbberlin/tools/net/http/dom" 5 "golang.org/x/net/html" 6 ) 7 8 /* 9 div div 10 div p 11 p TO img 12 img p 13 p 14 15 16 Operates from the *middle* div. 17 Saves all children in inverted slice. 18 Removes each child and reattaches it one level higher. 19 Finally the intermediary, now childless div is removed. 20 21 22 23 24 \ / 25 \ /\ / 26 \_____/ \_____/ 27 28 \ / 29 \_____/\_____/ 30 31 \__________/ => Breaks are gone 32 33 34 \p1___p2___/ => Wrapping preserves breaks 35 36 37 38 39 */ 40 func topDownV1(n *html.Node, couple []string, parentType string) { 41 42 if noParent(n) { 43 return 44 } 45 p := n.Parent 46 47 parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div 48 iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div 49 50 noSiblings := n.PrevSibling == nil && n.NextSibling == nil 51 52 only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild 53 svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild 54 noChildren := n.FirstChild == nil 55 56 _, _ = noSiblings, noChildren 57 58 if parDiv && iAmDiv { 59 60 if only1Child || svrlChildn { 61 62 var children []*html.Node 63 for c := n.FirstChild; c != nil; c = c.NextSibling { 64 children = append([]*html.Node{c}, children...) // order inversion 65 } 66 67 insertionPoint := n.NextSibling 68 for _, c1 := range children { 69 70 n.RemoveChild(c1) 71 72 if c1.Type == html.TextNode || c1.Data == "a" { 73 // pf("wrapping %v\n", NodeTypeStr(c1.Type)) 74 wrap := html.Node{Type: html.ElementNode, Data: "p", 75 Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}} 76 wrap.FirstChild = c1 77 p.InsertBefore(&wrap, insertionPoint) 78 c1.Parent = &wrap 79 insertionPoint = &wrap 80 81 } else { 82 p.InsertBefore(c1, insertionPoint) 83 insertionPoint = c1 84 } 85 86 } 87 p.RemoveChild(n) 88 if p.Data != parentType { 89 p.Data = parentType 90 } 91 92 } 93 94 } 95 96 } 97 98 func noParent(n *html.Node) bool { 99 100 p := n.Parent 101 if p == nil { 102 if n.Type == html.DoctypeNode || n.Type == html.DocumentNode { 103 return true 104 } 105 pf("parent is nil\n") 106 b := dom.PrintSubtree(n) 107 pf("%s", b) 108 return true 109 } 110 111 return false 112 113 }