github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/xpath_and_node_histogr.go (about) 1 package domclean2 2 3 import ( 4 "fmt" 5 6 "github.com/pbberlin/tools/util" 7 "golang.org/x/net/html" 8 ) 9 10 // vars used by all recursive function calls: 11 var ( 12 xPath util.Stack 13 xPathSkip = map[string]bool{"em": true, "b": true, "br": true} 14 xPathDump []byte 15 ) 16 17 // computeXPathStack writes an xpath log. 18 // computeXPathStack also collects frequency of node type data. 19 func computeXPathStack(n *html.Node, lvl int) { 20 21 if lvl == 0 { 22 xPathDump = []byte{} 23 } 24 25 // Before children processing 26 switch n.Type { 27 case html.ElementNode: 28 29 nodeDistinct[n.Data]++ // Histogram 30 31 if !xPathSkip[n.Data] { 32 xPath.Push(n.Data) 33 // apart from skipped node types: xPath.Len() == lvl 34 s := fmt.Sprintf("%2v: %s\n", xPath.Len(), xPath.StringExt(false)) 35 xPathDump = append(xPathDump, s...) // yes, string appends to byteSlice ; http://stackoverflow.com/questions/16248241/concatenate-two-slices-in-go# 36 } 37 } 38 39 // Children 40 for c := n.FirstChild; c != nil; c = c.NextSibling { 41 computeXPathStack(c, lvl+1) 42 } 43 44 // After children processing 45 switch n.Type { 46 case html.ElementNode: 47 if !xPathSkip[n.Data] { 48 xPath.Pop() 49 } 50 } 51 52 }