github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v2.go (about) 1 package domclean2 2 3 import "golang.org/x/net/html" 4 5 // Condense upwards builds a three-levels subtree 6 // starting from param node l1 7 // l2 and l3 nodes need to comply by type 8 // 9 // Then l3 is moved under l1; l2 is eliminated 10 // 11 // For <a> or "text" l3 nodes, we could introduce wrappers 12 // 13 // l2Types so far always is "div". 14 // Multiple l2Types are possible, but difficult to imagine. 15 // 16 // l1 type could be changed - from div to ul for instance, but I found no use for that 17 // 18 // Implementation yields similar result as condenseTopDown1 19 // but the "all-or-nothing" logic is clearer 20 func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) { 21 22 if l1.Type != html.ElementNode && 23 l1.Type != html.DocumentNode { 24 return // cannot assign to - do not unable to have children 25 } 26 if l1.Data == "span" || l1.Data == "a" { 27 return // want not condense into 28 } 29 30 // dig two levels deeper 31 32 // isolate l2 33 var l2s []*html.Node 34 for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling { 35 l2s = append(l2s, l2) 36 // l2s = append([]*html.Node{l2}, l2s...) // order inversion 37 } 38 39 // measure types 40 l2Div := true 41 42 // note that *all* l3 must have l3Type, not just those those of one l2 element 43 // otherwise we get only partial restructuring - and therefore sequence errors 44 l3Div := true 45 46 for _, l2 := range l2s { 47 l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div 48 for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { 49 l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form 50 } 51 } 52 53 // act 54 if l2Div && l3Div { 55 for _, l2 := range l2s { 56 57 // isolate l3 58 var l3s []*html.Node 59 for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { 60 l3s = append(l3s, l3) 61 // l3s = append([]*html.Node{l3}, l3s...) // order inversion 62 } 63 64 // detach l3 from l2 65 for _, l3 := range l3s { 66 l2.RemoveChild(l3) 67 } 68 l1.RemoveChild(l2) // detach l2 from l1 69 70 for _, l3 := range l3s { 71 // attach l3 to l1, possible wrapper of <a> or <span> 72 l1.InsertBefore(l3, nil) // insert at end 73 74 // wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}} 75 // wrap.FirstChild = c1 76 // l1.InsertBefore(&wrap, nil) 77 78 } 79 80 } 81 } 82 83 }