github.com/Kindred87/Obsidian@v0.0.0-20210809203756-86936424b848/retrieval/html/nodes.go (about) 1 package html 2 3 import ( 4 "github.com/PuerkitoBio/goquery" 5 "golang.org/x/net/html" 6 ) 7 8 // NodeList represents a collection of Nodes with an arbitrary identifier. 9 type NodeList struct { 10 Name string 11 Nodes []Node 12 } 13 14 // Node represents an html node within a document. 15 type Node struct { 16 // ID is an arbitrary value that is primarily used for distinguishing a node's 17 // place within a sequence, such as a NodeList. 18 ID int 19 // Name is synonymous with an html tag or goquery.NodeName. 20 Name string 21 // Data represents the string data contained within an html node. 22 Data string 23 // Class represents the optional class attribute of an html node. 24 Class string 25 } 26 27 // nodeFromSelection creates a Node based off of the given ID and selection attributes. 28 func nodeFromSelection(ID int, sel *goquery.Selection) Node { 29 newNode := Node{ 30 ID: ID, 31 Name: goquery.NodeName(sel), 32 } 33 34 if class, exists := sel.Attr("class"); exists { 35 newNode.Class = class 36 } 37 38 return newNode 39 } 40 41 // nodeFromNode creates a Node based off of the given ID and html.Node attributes. 42 func nodeFromNode(ID int, n *html.Node) Node { 43 newNode := Node{ 44 ID: ID, 45 Data: n.Data, 46 } 47 48 if found, class := classAttributeFrom(n.Attr); found { 49 newNode.Class = class 50 } 51 52 return newNode 53 } 54 55 // classAttributeFrom returns the value of the contained Attribute with the key of "class". 56 // Returned boolean indicates if a value was found. 57 func classAttributeFrom(attr []html.Attribute) (bool, string) { 58 for _, a := range attr { 59 if a.Key == "class" { 60 return true, a.Val 61 } 62 } 63 return false, "" 64 }