github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/07_condense_bottom_up_v2.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"log"
     7  	"strings"
     8  
     9  	"github.com/pbberlin/tools/net/http/dom"
    10  	"golang.org/x/net/html"
    11  )
    12  
    13  func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) {
    14  
    15  	if b == nil {
    16  		b = new(bytes.Buffer)
    17  	}
    18  	if tpar == nil {
    19  		tpar = &html.Node{
    20  			Type:     n.Type,
    21  			DataAtom: n.DataAtom,
    22  			Data:     n.Data,
    23  			Attr:     make([]html.Attribute, len(n.Attr)),
    24  		}
    25  		copy(tpar.Attr, n.Attr)
    26  	}
    27  
    28  	switch {
    29  	case n.Type == html.ElementNode && n.Data == "a":
    30  		n.Parent.RemoveChild(n)
    31  		tpar.AppendChild(n)
    32  		// wpf(b, "[a] ")
    33  	case n.Type == html.ElementNode && n.Data == "img":
    34  		// img2Link(n)
    35  		n.Parent.RemoveChild(n)
    36  		tpar.AppendChild(n)
    37  	case n.Data == "em" || n.Data == "strong":
    38  		wpf(b, "[%v l%v] ", n.Data, depth)
    39  		n.Parent.RemoveChild(n)
    40  		tpar.AppendChild(n)
    41  	case n.Data == "label" || n.Data == "input" || n.Data == "textarea":
    42  		n.Parent.RemoveChild(n)
    43  		tpar.AppendChild(n)
    44  	case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul":
    45  		n.Parent.RemoveChild(n)
    46  		tpar.AppendChild(n)
    47  	case n.Data == "span":
    48  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    49  			n.RemoveChild(c)
    50  			tpar.AppendChild(c)
    51  		}
    52  		n.Parent.RemoveChild(n)
    53  	case n.Type == html.TextNode && n.Data != "":
    54  		n.Data = strings.TrimSpace(n.Data)
    55  		n.Data += " "
    56  		wpf(b, n.Data)
    57  		n.Parent.RemoveChild(n)
    58  		tpar.AppendChild(n)
    59  	default:
    60  		log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data)
    61  		n.Parent.RemoveChild(n)
    62  	}
    63  
    64  	//
    65  	//
    66  	children := []*html.Node{}
    67  	for c := n.FirstChild; c != nil; c = c.NextSibling {
    68  		// fmt.Printf("still has children %v\n", c.Data)
    69  		children = append(children, c) //  assembling separately, before removing.
    70  	}
    71  	for _, c := range children {
    72  		flattenSubtreeV2(c, b, depth+1, tpar)
    73  	}
    74  
    75  	return b, tpar
    76  }
    77  
    78  func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) {
    79  
    80  	if lvl < lvlDo {
    81  
    82  		cs := []*html.Node{}
    83  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    84  			cs = append(cs, c)
    85  		}
    86  		for _, c := range cs {
    87  			condenseBottomUpV2(c, lvl+1, lvlDo, types)
    88  		}
    89  
    90  	} else {
    91  
    92  		// log.Printf("action on %v %v\n", lvl, lvlDo)
    93  
    94  		switch {
    95  
    96  		case n.Type == html.ElementNode && types[n.Data]:
    97  
    98  			oldPar := n.Parent
    99  			if oldPar == nil {
   100  				return
   101  			}
   102  
   103  			b, newPar := flattenSubtreeV2(n, nil, 0, nil)
   104  
   105  			// placeholder := dom.Nd("div")
   106  			// par := n.Parent
   107  			// par.InsertBefore(placeholder, n.NextSibling)
   108  			// par.RemoveChild(n)
   109  			// par.InsertBefore(n2, placeholder)
   110  
   111  			for c := oldPar.FirstChild; c != nil; c = c.NextSibling {
   112  				oldPar.RemoveChild(c)
   113  			}
   114  
   115  			for c := newPar.FirstChild; c != nil; c = c.NextSibling {
   116  				newPar.RemoveChild(c)
   117  				oldPar.AppendChild(c)
   118  			}
   119  
   120  			if lvlDo > 4 {
   121  				bx := dom.PrintSubtree(newPar)
   122  				fmt.Printf("%s", bx)
   123  			}
   124  
   125  			// n = n2
   126  
   127  			nodeRepl := dom.Nd("text", b.String())
   128  
   129  			if false {
   130  
   131  				// Remove all existing children.
   132  				// Direct loop impossible, since "NextSibling" is set to nil by Remove().
   133  				children := []*html.Node{}
   134  				for c := n.FirstChild; c != nil; c = c.NextSibling {
   135  					children = append(children, c) //  assembling separately, before removing.
   136  				}
   137  				for _, c := range children {
   138  					log.Printf("c %4v rem from %4v ", c.Data, n.Data)
   139  					n.RemoveChild(c)
   140  				}
   141  
   142  				// we can't put our replacement "under" an image, since img cannot have children
   143  				if n.Type == html.ElementNode && n.Data == "img" {
   144  					n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end
   145  					n.Parent.RemoveChild(n)
   146  				} else {
   147  					n.AppendChild(nodeRepl)
   148  				}
   149  
   150  				// Insert a  || and a newline before every <a...>
   151  				// if n.Data == "a" {
   152  				// 	n.Parent.InsertBefore(dom.Nd("text", " || "), n)
   153  				// }
   154  			}
   155  
   156  		default:
   157  		}
   158  
   159  	}
   160  
   161  }