github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v3.go (about) 1 package domclean2 2 3 import ( 4 "github.com/pbberlin/tools/net/http/dom" 5 "golang.org/x/net/html" 6 ) 7 8 // Now this third implementation finally condenses *selectively*. 9 // Not all boats from each pond are lifted equally. 10 // We achieve tremendous structural simplification. 11 // It also starts from top, pulling lower levels up. 12 // Unlike implementation #1, that started from the middle. 13 func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) { 14 15 if l1.Type != html.ElementNode && 16 l1.Type != html.DocumentNode { 17 return // cannot assign to - do not unable to have children 18 } 19 if l1.Data == "span" || l1.Data == "a" { 20 return // want not condense into 21 } 22 23 // dig two levels deep 24 25 // isolate l2,l3 26 l2s := []*html.Node{} 27 l3s := map[*html.Node][]*html.Node{} 28 29 for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling { 30 31 l2s = append(l2s, l2) 32 // l2s = append([]*html.Node{l2}, l2s...) // order inversion 33 34 for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { 35 l3s[l2] = append(l3s[l2], l3) 36 // l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion 37 } 38 } 39 40 postponedRemoval := map[*html.Node]bool{} 41 42 // 43 // 44 // check types for each l2 subtree distinctively 45 for _, l2 := range l2s { 46 47 l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div 48 49 l3Match := true 50 for _, l3 := range l3s[l2] { 51 l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data]) 52 } 53 54 // act 55 if l2Match && l3Match { 56 57 // detach l3 from l2 58 for _, l3 := range l3s[l2] { 59 // if ml3[l3] > 0 { 60 // fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data) 61 // } 62 l2.RemoveChild(l3) 63 // ml3[l3]++ 64 } 65 66 // Since we still need l2 below 67 // We have to postpone detaching l2 from l1 68 // to the bottom 69 // NOT HERE: l1.RemoveChild(l2) 70 postponedRemoval[l2] = true 71 72 for _, l3 := range l3s[l2] { 73 // attach l3 to l1 74 75 if l3.Data != "a" && l3.Data != "span" { 76 l1.InsertBefore(l3, l2) 77 } else { 78 wrap := dom.Nd("p") 79 wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}} 80 wrap.AppendChild(l3) 81 // NOT wrap.FirstChild = l3 82 l1.InsertBefore(wrap, l2) 83 } 84 } 85 86 } 87 88 } 89 90 for k, _ := range postponedRemoval { 91 l1.RemoveChild(k) // detach l2 from l1 92 } 93 94 }