github.com/pbberlin/tools@v0.0.0-20160910141205-7aa5421c2169/net/http/domclean2/xpath_and_node_histogr.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/pbberlin/tools/util"
     7  	"golang.org/x/net/html"
     8  )
     9  
    10  // vars used by all recursive function calls:
    11  var (
    12  	xPath     util.Stack
    13  	xPathSkip = map[string]bool{"em": true, "b": true, "br": true}
    14  	xPathDump []byte
    15  )
    16  
    17  // computeXPathStack writes an xpath log.
    18  // computeXPathStack also collects frequency of node type data.
    19  func computeXPathStack(n *html.Node, lvl int) {
    20  
    21  	if lvl == 0 {
    22  		xPathDump = []byte{}
    23  	}
    24  
    25  	// Before children processing
    26  	switch n.Type {
    27  	case html.ElementNode:
    28  
    29  		nodeDistinct[n.Data]++ // Histogram
    30  
    31  		if !xPathSkip[n.Data] {
    32  			xPath.Push(n.Data)
    33  			// apart from skipped node types:  xPath.Len() == lvl
    34  			s := fmt.Sprintf("%2v: %s\n", xPath.Len(), xPath.StringExt(false))
    35  			xPathDump = append(xPathDump, s...) // yes, string appends to byteSlice ; http://stackoverflow.com/questions/16248241/concatenate-two-slices-in-go#
    36  		}
    37  	}
    38  
    39  	// Children
    40  	for c := n.FirstChild; c != nil; c = c.NextSibling {
    41  		computeXPathStack(c, lvl+1)
    42  	}
    43  
    44  	// After children processing
    45  	switch n.Type {
    46  	case html.ElementNode:
    47  		if !xPathSkip[n.Data] {
    48  			xPath.Pop()
    49  		}
    50  	}
    51  
    52  }