github.com/pbberlin/tools@v0.0.0-20160910141205-7aa5421c2169/net/http/domclean2/02_remove_comments_and_whitespace.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"github.com/pbberlin/tools/net/http/dom"
     5  	"github.com/pbberlin/tools/util"
     6  	"golang.org/x/net/html"
     7  )
     8  
     9  // NdX is a html.node, extended by its level.
    10  // It's used since the horizontal traversal with
    11  // a queue has no recursion and therefore
    12  // keeps no depth information.
    13  type NdX struct {
    14  	Nd  *html.Node
    15  	Lvl int
    16  }
    17  
    18  // removeCommentsAndIntertagWhitespace employs horizontal traversal using a queue
    19  func removeCommentsAndIntertagWhitespace(lp interface{}) {
    20  
    21  	var queue = util.NewQueue(10)
    22  
    23  	for lp != nil {
    24  
    25  		lpn := lp.(NdX).Nd
    26  		lvl := lp.(NdX).Lvl
    27  
    28  		for c := lpn.FirstChild; c != nil; c = c.NextSibling {
    29  			queue.EnQueue(NdX{c, lvl + 1})
    30  		}
    31  
    32  		// processing
    33  		if lpn.Type == html.CommentNode {
    34  			dom.RemoveNode(lpn)
    35  		}
    36  
    37  		// extinguish textnodes that do only formatting (spaces, tabs, line breaks)
    38  		if lpn.Type == html.TextNode && isSpacey(lpn.Data) {
    39  			dom.RemoveNode(lpn)
    40  		}
    41  
    42  		// next node
    43  		lp = queue.DeQueue()
    44  	}
    45  }