github.com/phpdave11/gofpdf@v1.4.2/htmlbasic.go (about) 1 /* 2 * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung) 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 package gofpdf 18 19 import ( 20 "regexp" 21 "strings" 22 ) 23 24 // HTMLBasicSegmentType defines a segment of literal text in which the current 25 // attributes do not vary, or an open tag or a close tag. 26 type HTMLBasicSegmentType struct { 27 Cat byte // 'O' open tag, 'C' close tag, 'T' text 28 Str string // Literal text unchanged, tags are lower case 29 Attr map[string]string // Attribute keys are lower case 30 } 31 32 // HTMLBasicTokenize returns a list of HTML tags and literal elements. This is 33 // done with regular expressions, so the result is only marginally better than 34 // useless. 35 func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) { 36 // This routine is adapted from http://www.fpdf.org/ 37 list = make([]HTMLBasicSegmentType, 0, 16) 38 htmlStr = strings.Replace(htmlStr, "\n", " ", -1) 39 htmlStr = strings.Replace(htmlStr, "\r", "", -1) 40 tagRe, _ := regexp.Compile(`(?U)<.*>`) 41 attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`) 42 capList := tagRe.FindAllStringIndex(htmlStr, -1) 43 if capList != nil { 44 var seg HTMLBasicSegmentType 45 var parts []string 46 pos := 0 47 for _, cap := range capList { 48 if pos < cap[0] { 49 seg.Cat = 'T' 50 seg.Str = htmlStr[pos:cap[0]] 51 seg.Attr = nil 52 list = append(list, seg) 53 } 54 if htmlStr[cap[0]+1] == '/' { 55 seg.Cat = 'C' 56 seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1]) 57 seg.Attr = nil 58 list = append(list, seg) 59 } else { 60 // Extract attributes 61 parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ") 62 if len(parts) > 0 { 63 for j, part := range parts { 64 if j == 0 { 65 seg.Cat = 'O' 66 seg.Str = strings.ToLower(parts[0]) 67 seg.Attr = make(map[string]string) 68 } else { 69 attrList := attrRe.FindAllStringSubmatch(part, -1) 70 if attrList != nil { 71 for _, attr := range attrList { 72 seg.Attr[strings.ToLower(attr[1])] = attr[2] 73 } 74 } 75 } 76 } 77 list = append(list, seg) 78 } 79 } 80 pos = cap[1] 81 } 82 if len(htmlStr) > pos { 83 seg.Cat = 'T' 84 seg.Str = htmlStr[pos:] 85 seg.Attr = nil 86 list = append(list, seg) 87 } 88 } else { 89 list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil}) 90 } 91 return 92 } 93 94 // HTMLBasicType is used for rendering a very basic subset of HTML. It supports 95 // only hyperlinks and bold, italic and underscore attributes. In the Link 96 // structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color 97 // of hyperlinks. The Bold, Italic and Underscore values define the hyperlink 98 // style. 99 type HTMLBasicType struct { 100 pdf *Fpdf 101 Link struct { 102 ClrR, ClrG, ClrB int 103 Bold, Italic, Underscore bool 104 } 105 } 106 107 // HTMLBasicNew returns an instance that facilitates writing basic HTML in the 108 // specified PDF file. 109 func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) { 110 html.pdf = f 111 html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128 112 html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true 113 return 114 } 115 116 // Write prints text from the current position using the currently selected 117 // font. See HTMLBasicNew() to create a receiver that is associated with the 118 // PDF document instance. The text can be encoded with a basic subset of HTML 119 // that includes hyperlinks and tags for italic (I), bold (B), underscore 120 // (U) and center (CENTER) attributes. When the right margin is reached a line 121 // break occurs and text continues from the left margin. Upon method exit, the 122 // current position is left at the end of the text. 123 // 124 // lineHt indicates the line height in the unit of measure specified in New(). 125 func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) { 126 var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int 127 var textR, textG, textB = html.pdf.GetTextColor() 128 var hrefStr string 129 if html.Link.Bold { 130 linkBold = 1 131 } 132 if html.Link.Italic { 133 linkItalic = 1 134 } 135 if html.Link.Underscore { 136 linkUnderscore = 1 137 } 138 setStyle := func(boldAdj, italicAdj, underscoreAdj int) { 139 styleStr := "" 140 boldLvl += boldAdj 141 if boldLvl > 0 { 142 styleStr += "B" 143 } 144 italicLvl += italicAdj 145 if italicLvl > 0 { 146 styleStr += "I" 147 } 148 underscoreLvl += underscoreAdj 149 if underscoreLvl > 0 { 150 styleStr += "U" 151 } 152 html.pdf.SetFont("", styleStr, 0) 153 } 154 putLink := func(urlStr, txtStr string) { 155 // Put a hyperlink 156 html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB) 157 setStyle(linkBold, linkItalic, linkUnderscore) 158 html.pdf.WriteLinkString(lineHt, txtStr, urlStr) 159 setStyle(-linkBold, -linkItalic, -linkUnderscore) 160 html.pdf.SetTextColor(textR, textG, textB) 161 } 162 list := HTMLBasicTokenize(htmlStr) 163 var ok bool 164 alignStr := "L" 165 for _, el := range list { 166 switch el.Cat { 167 case 'T': 168 if len(hrefStr) > 0 { 169 putLink(hrefStr, el.Str) 170 hrefStr = "" 171 } else { 172 if alignStr == "C" || alignStr == "R" { 173 html.pdf.WriteAligned(0, lineHt, el.Str, alignStr) 174 } else { 175 html.pdf.Write(lineHt, el.Str) 176 } 177 } 178 case 'O': 179 switch el.Str { 180 case "b": 181 setStyle(1, 0, 0) 182 case "i": 183 setStyle(0, 1, 0) 184 case "u": 185 setStyle(0, 0, 1) 186 case "br": 187 html.pdf.Ln(lineHt) 188 case "center": 189 html.pdf.Ln(lineHt) 190 alignStr = "C" 191 case "right": 192 html.pdf.Ln(lineHt) 193 alignStr = "R" 194 case "left": 195 html.pdf.Ln(lineHt) 196 alignStr = "L" 197 case "a": 198 hrefStr, ok = el.Attr["href"] 199 if !ok { 200 hrefStr = "" 201 } 202 } 203 case 'C': 204 switch el.Str { 205 case "b": 206 setStyle(-1, 0, 0) 207 case "i": 208 setStyle(0, -1, 0) 209 case "u": 210 setStyle(0, 0, -1) 211 case "center": 212 html.pdf.Ln(lineHt) 213 alignStr = "L" 214 case "right": 215 html.pdf.Ln(lineHt) 216 alignStr = "L" 217 } 218 } 219 } 220 }