github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/03_top_down_v1.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"github.com/pbberlin/tools/net/http/dom"
     5  	"golang.org/x/net/html"
     6  )
     7  
     8  /*
     9     div                     div
    10         div                     p
    11             p         TO        img
    12             img                 p
    13             p
    14  
    15  
    16  	Operates from the *middle* div.
    17  	Saves all children in inverted slice.
    18  	Removes each child and reattaches it one level higher.
    19  	Finally the intermediary, now childless div is removed.
    20  
    21  
    22  
    23  
    24     \                  /
    25      \       /\       /
    26       \_____/  \_____/
    27  
    28       \              /
    29        \_____/\_____/
    30  
    31         \__________/     => Breaks are gone
    32  
    33  
    34         \p1___p2___/     => Wrapping preserves breaks
    35  
    36  
    37  
    38  
    39  */
    40  func topDownV1(n *html.Node, couple []string, parentType string) {
    41  
    42  	if noParent(n) {
    43  		return
    44  	}
    45  	p := n.Parent
    46  
    47  	parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
    48  	iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div
    49  
    50  	noSiblings := n.PrevSibling == nil && n.NextSibling == nil
    51  
    52  	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
    53  	svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
    54  	noChildren := n.FirstChild == nil
    55  
    56  	_, _ = noSiblings, noChildren
    57  
    58  	if parDiv && iAmDiv {
    59  
    60  		if only1Child || svrlChildn {
    61  
    62  			var children []*html.Node
    63  			for c := n.FirstChild; c != nil; c = c.NextSibling {
    64  				children = append([]*html.Node{c}, children...) // order inversion
    65  			}
    66  
    67  			insertionPoint := n.NextSibling
    68  			for _, c1 := range children {
    69  
    70  				n.RemoveChild(c1)
    71  
    72  				if c1.Type == html.TextNode || c1.Data == "a" {
    73  					// pf("wrapping %v\n", NodeTypeStr(c1.Type))
    74  					wrap := html.Node{Type: html.ElementNode, Data: "p",
    75  						Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
    76  					wrap.FirstChild = c1
    77  					p.InsertBefore(&wrap, insertionPoint)
    78  					c1.Parent = &wrap
    79  					insertionPoint = &wrap
    80  
    81  				} else {
    82  					p.InsertBefore(c1, insertionPoint)
    83  					insertionPoint = c1
    84  				}
    85  
    86  			}
    87  			p.RemoveChild(n)
    88  			if p.Data != parentType {
    89  				p.Data = parentType
    90  			}
    91  
    92  		}
    93  
    94  	}
    95  
    96  }
    97  
    98  func noParent(n *html.Node) bool {
    99  
   100  	p := n.Parent
   101  	if p == nil {
   102  		if n.Type == html.DoctypeNode || n.Type == html.DocumentNode {
   103  			return true
   104  		}
   105  		pf("parent is nil\n")
   106  		b := dom.PrintSubtree(n)
   107  		pf("%s", b)
   108  		return true
   109  	}
   110  
   111  	return false
   112  
   113  }