github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/domclean2/06_img2link.go (about)

     1  package domclean2
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/pbberlin/tools/net/http/dom"
     8  	"github.com/pbberlin/tools/stringspb"
     9  	"golang.org/x/net/html"
    10  )
    11  
    12  func closureTextNodeExists(img *html.Node) (found bool) {
    13  
    14  	txt := attrX(img.Attr, "title")
    15  	if len(txt) < 5 {
    16  		return false
    17  	}
    18  	txt = stringspb.NormalizeInnerWhitespace(txt)
    19  	txt = strings.TrimSpace(txt)
    20  
    21  	// We dont search entire document, but three levels above image subtree
    22  	grandParent := img
    23  	for i := 0; i < 4; i++ {
    24  		if grandParent.Parent != nil {
    25  			grandParent = grandParent.Parent
    26  		} else {
    27  			// log.Printf("LevelsUp %v for %q", i, txt)
    28  			break
    29  		}
    30  	}
    31  
    32  	var recurseTextNodes func(n *html.Node)
    33  	recurseTextNodes = func(n *html.Node) {
    34  
    35  		if found {
    36  			return
    37  		}
    38  
    39  		cc := []*html.Node{}
    40  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    41  			cc = append(cc, c)
    42  		}
    43  		for _, c := range cc {
    44  			recurseTextNodes(c)
    45  		}
    46  
    47  		if n.Type == html.TextNode {
    48  			n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
    49  			if len(n.Data) >= len(txt) {
    50  				// if strings.Contains(txt, "FDP") {
    51  				// 	log.Printf("%25v     %v", stringspb.Ellipsoider(txt, 10), stringspb.Ellipsoider(n.Data, 10))
    52  				// }
    53  				fnd := strings.Contains(n.Data, txt)
    54  				if fnd {
    55  					found = true
    56  					return
    57  				}
    58  			}
    59  		}
    60  	}
    61  	recurseTextNodes(grandParent)
    62  
    63  	return
    64  }
    65  
    66  func img2Link(img *html.Node) {
    67  
    68  	if img.Data == "img" {
    69  
    70  		img.Data = "a"
    71  		for i := 0; i < len(img.Attr); i++ {
    72  			if img.Attr[i].Key == "src" {
    73  				img.Attr[i].Key = "href"
    74  			}
    75  		}
    76  
    77  		double := closureTextNodeExists(img)
    78  		imgContent := ""
    79  		title := attrX(img.Attr, "title")
    80  
    81  		if double {
    82  			imgContent = fmt.Sprintf("[img] %v %v | ",
    83  				"[ctdr]", // content title double removed
    84  				urlBeautify(attrX(img.Attr, "href")))
    85  
    86  		} else {
    87  			imgContent = fmt.Sprintf("[img] %v %v | ",
    88  				title,
    89  				urlBeautify(attrX(img.Attr, "href")))
    90  		}
    91  
    92  		img.Attr = attrSet(img.Attr, "cfrom", "img")
    93  		nd := dom.Nd("text", imgContent)
    94  		img.AppendChild(nd)
    95  	}
    96  
    97  }
    98  
    99  func recurseImg2Link(n *html.Node) {
   100  
   101  	cc := []*html.Node{}
   102  	for c := n.FirstChild; c != nil; c = c.NextSibling {
   103  		cc = append(cc, c)
   104  	}
   105  	for _, c := range cc {
   106  		recurseImg2Link(c)
   107  	}
   108  
   109  	if n.Type == html.ElementNode && n.Data == "img" {
   110  		img2Link(n)
   111  	}
   112  }