github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v2.go (about)

     1  package domclean2
     2  
     3  import "golang.org/x/net/html"
     4  
     5  // Condense upwards builds a three-levels subtree
     6  // starting from param node l1
     7  // l2 and l3 nodes need to comply by type
     8  //
     9  // Then l3 is moved under l1; l2 is eliminated
    10  //
    11  // For <a> or "text" l3 nodes, we could introduce wrappers
    12  //
    13  // l2Types so far always is "div".
    14  // Multiple l2Types are possible, but difficult to imagine.
    15  //
    16  // l1 type could be changed - from div to ul for instance, but I found no use for that
    17  //
    18  // Implementation yields similar result as condenseTopDown1
    19  // but the "all-or-nothing" logic is clearer
    20  func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {
    21  
    22  	if l1.Type != html.ElementNode &&
    23  		l1.Type != html.DocumentNode {
    24  		return // cannot assign to - do not unable to have children
    25  	}
    26  	if l1.Data == "span" || l1.Data == "a" {
    27  		return // want not condense into
    28  	}
    29  
    30  	// dig two levels deeper
    31  
    32  	// isolate l2
    33  	var l2s []*html.Node
    34  	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {
    35  		l2s = append(l2s, l2)
    36  		// l2s = append([]*html.Node{l2}, l2s...) // order inversion
    37  	}
    38  
    39  	// measure types
    40  	l2Div := true
    41  
    42  	// note that *all* l3 must have l3Type, not just those those of one l2 element
    43  	// otherwise we get only partial restructuring - and therefore sequence errors
    44  	l3Div := true
    45  
    46  	for _, l2 := range l2s {
    47  		l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div
    48  		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
    49  			l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form
    50  		}
    51  	}
    52  
    53  	// act
    54  	if l2Div && l3Div {
    55  		for _, l2 := range l2s {
    56  
    57  			// isolate l3
    58  			var l3s []*html.Node
    59  			for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
    60  				l3s = append(l3s, l3)
    61  				// l3s = append([]*html.Node{l3}, l3s...) // order inversion
    62  			}
    63  
    64  			// detach l3 from l2
    65  			for _, l3 := range l3s {
    66  				l2.RemoveChild(l3)
    67  			}
    68  			l1.RemoveChild(l2) // detach l2 from l1
    69  
    70  			for _, l3 := range l3s {
    71  				// attach l3 to l1, possible wrapper of <a> or <span>
    72  				l1.InsertBefore(l3, nil) // insert at end
    73  
    74  				// wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
    75  				// wrap.FirstChild = c1
    76  				// l1.InsertBefore(&wrap, nil)
    77  
    78  			}
    79  
    80  		}
    81  	}
    82  
    83  }