github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/07_condense_bottom_up_v3.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"log"
     5  	"strings"
     6  
     7  	"github.com/pbberlin/tools/net/http/dom"
     8  	"golang.org/x/net/html"
     9  )
    10  
    11  func flattenSubtreeV3(n, nClone *html.Node) {
    12  
    13  	// log.Printf("fsbo\n")
    14  	flattenSubtreeV3Inner(n, nClone, 0)
    15  
    16  }
    17  
    18  var standard = map[string]bool{
    19  
    20  	"title": true,
    21  
    22  	"p":   true,
    23  	"div": true,
    24  	"ul":  true,
    25  	"ol":  true,
    26  	"li":  true,
    27  	"h1":  true,
    28  	"h2":  true,
    29  
    30  	"em":       true,
    31  	"strong":   true,
    32  	"label":    true,
    33  	"input":    true,
    34  	"textarea": true,
    35  
    36  	"form":       true,
    37  	"blockquote": true,
    38  }
    39  
    40  func flattenSubtreeV3Inner(n, nClone *html.Node, lvl int) {
    41  
    42  	// log.Printf("fsbi\n")
    43  
    44  	for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {
    45  
    46  		chClone := dom.CloneNode(ch)
    47  
    48  		switch {
    49  
    50  		case ch.Type == html.ElementNode && standard[ch.Data]:
    51  			nClone.AppendChild(chClone)
    52  			flattenSubtreeV3Inner(ch, chClone, lvl+1)
    53  
    54  		case ch.Type == html.ElementNode && ch.Data == "a":
    55  			nClone.AppendChild(chClone)
    56  			flattenSubtreeV3Inner(ch, chClone, lvl+1)
    57  
    58  		case ch.Type == html.ElementNode && ch.Data == "img":
    59  			nClone.AppendChild(chClone)
    60  
    61  		case ch.Data == "span":
    62  			// log.Printf(strings.Repeat("  ", lvl) + "span \n")
    63  			for cch := ch.FirstChild; cch != nil; cch = cch.NextSibling {
    64  				// log.Printf(strings.Repeat("    ", lvl)+"span child %v", cch.Data)
    65  				cchClone := dom.CloneNode(cch)
    66  				nClone.AppendChild(cchClone)
    67  				nClone.AppendChild(dom.Nd("text", " "))
    68  				flattenSubtreeV3Inner(cch, cchClone, lvl+1)
    69  			}
    70  
    71  		case ch.Type == html.TextNode && ch.Data != "":
    72  			chClone.Data = strings.TrimSpace(chClone.Data)
    73  			chClone.Data += " "
    74  			nClone.AppendChild(chClone)
    75  
    76  		default:
    77  			//			nClone.AppendChild(chClone)
    78  			log.Printf("unhandled %s %s\n", dom.NodeTypeStr(ch.Type), ch.Data)
    79  
    80  		}
    81  
    82  	}
    83  
    84  }
    85  
    86  func condenseBottomUpV3(n *html.Node, lvl, lvlDo int, unusedTypes map[string]bool) {
    87  
    88  	if lvl < lvlDo {
    89  
    90  		// Delve deeper until reaching lvlDo
    91  		cs := []*html.Node{}
    92  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    93  			cs = append(cs, c)
    94  		}
    95  		for _, c := range cs {
    96  			condenseBottomUpV3(c, lvl+1, lvlDo, unusedTypes)
    97  		}
    98  
    99  	} else {
   100  
   101  		if n.Type == html.ElementNode {
   102  
   103  			nClone := dom.CloneNode(n)
   104  			flattenSubtreeV3(n, nClone)
   105  
   106  			nParent := n.Parent
   107  			nParent.InsertBefore(nClone, n)
   108  			nParent.RemoveChild(n)
   109  
   110  			// 	bx := dom.PrintSubtree(nParent)
   111  			// 	fmt.Printf("%s", bx)
   112  		}
   113  
   114  	}
   115  
   116  }