github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v3.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"github.com/pbberlin/tools/net/http/dom"
     5  	"golang.org/x/net/html"
     6  )
     7  
     8  // Now this third implementation finally condenses *selectively*.
     9  // Not all boats from each pond are lifted equally.
    10  // We achieve tremendous structural simplification.
    11  // It also starts from top, pulling lower levels up.
    12  // Unlike implementation #1, that started from the middle.
    13  func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {
    14  
    15  	if l1.Type != html.ElementNode &&
    16  		l1.Type != html.DocumentNode {
    17  		return // cannot assign to - do not unable to have children
    18  	}
    19  	if l1.Data == "span" || l1.Data == "a" {
    20  		return // want not condense into
    21  	}
    22  
    23  	// dig two levels deep
    24  
    25  	// isolate l2,l3
    26  	l2s := []*html.Node{}
    27  	l3s := map[*html.Node][]*html.Node{}
    28  
    29  	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {
    30  
    31  		l2s = append(l2s, l2)
    32  		// l2s = append([]*html.Node{l2}, l2s...) // order inversion
    33  
    34  		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
    35  			l3s[l2] = append(l3s[l2], l3)
    36  			// l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion
    37  		}
    38  	}
    39  
    40  	postponedRemoval := map[*html.Node]bool{}
    41  
    42  	//
    43  	//
    44  	// check types for each l2 subtree distinctively
    45  	for _, l2 := range l2s {
    46  
    47  		l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div
    48  
    49  		l3Match := true
    50  		for _, l3 := range l3s[l2] {
    51  			l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data])
    52  		}
    53  
    54  		// act
    55  		if l2Match && l3Match {
    56  
    57  			// detach l3 from l2
    58  			for _, l3 := range l3s[l2] {
    59  				// if ml3[l3] > 0 {
    60  				// 	fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data)
    61  				// }
    62  				l2.RemoveChild(l3)
    63  				// ml3[l3]++
    64  			}
    65  
    66  			// Since we still need l2 below
    67  			// We have to postpone detaching l2 from l1
    68  			// to the bottom
    69  			// NOT HERE: l1.RemoveChild(l2)
    70  			postponedRemoval[l2] = true
    71  
    72  			for _, l3 := range l3s[l2] {
    73  				// attach l3 to l1
    74  
    75  				if l3.Data != "a" && l3.Data != "span" {
    76  					l1.InsertBefore(l3, l2)
    77  				} else {
    78  					wrap := dom.Nd("p")
    79  					wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}}
    80  					wrap.AppendChild(l3)
    81  					// NOT  wrap.FirstChild = l3
    82  					l1.InsertBefore(wrap, l2)
    83  				}
    84  			}
    85  
    86  		}
    87  
    88  	}
    89  
    90  	for k, _ := range postponedRemoval {
    91  		l1.RemoveChild(k) // detach l2 from l1
    92  	}
    93  
    94  }