codeberg.org/go-pdf/fpdf@v0.11.1/htmlbasic.go (about) 1 // Copyright ©2023 The go-pdf Authors. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung) 7 * 8 * Permission to use, copy, modify, and distribute this software for any 9 * purpose with or without fee is hereby granted, provided that the above 10 * copyright notice and this permission notice appear in all copies. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 */ 20 21 package fpdf 22 23 import ( 24 "regexp" 25 "strings" 26 ) 27 28 // HTMLBasicSegmentType defines a segment of literal text in which the current 29 // attributes do not vary, or an open tag or a close tag. 30 type HTMLBasicSegmentType struct { 31 Cat byte // 'O' open tag, 'C' close tag, 'T' text 32 Str string // Literal text unchanged, tags are lower case 33 Attr map[string]string // Attribute keys are lower case 34 } 35 36 // HTMLBasicTokenize returns a list of HTML tags and literal elements. This is 37 // done with regular expressions, so the result is only marginally better than 38 // useless. 39 func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) { 40 // This routine is adapted from http://www.fpdf.org/ 41 list = make([]HTMLBasicSegmentType, 0, 16) 42 htmlStr = strings.Replace(htmlStr, "\n", " ", -1) 43 htmlStr = strings.Replace(htmlStr, "\r", "", -1) 44 tagRe, _ := regexp.Compile(`(?U)<.*>`) 45 attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`) 46 capList := tagRe.FindAllStringIndex(htmlStr, -1) 47 if capList != nil { 48 var seg HTMLBasicSegmentType 49 var parts []string 50 pos := 0 51 for _, cap := range capList { 52 if pos < cap[0] { 53 seg.Cat = 'T' 54 seg.Str = htmlStr[pos:cap[0]] 55 seg.Attr = nil 56 list = append(list, seg) 57 } 58 if htmlStr[cap[0]+1] == '/' { 59 seg.Cat = 'C' 60 seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1]) 61 seg.Attr = nil 62 list = append(list, seg) 63 } else { 64 // Extract attributes 65 parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ") 66 if len(parts) > 0 { 67 for j, part := range parts { 68 if j == 0 { 69 seg.Cat = 'O' 70 seg.Str = strings.ToLower(parts[0]) 71 seg.Attr = make(map[string]string) 72 } else { 73 attrList := attrRe.FindAllStringSubmatch(part, -1) 74 for _, attr := range attrList { 75 seg.Attr[strings.ToLower(attr[1])] = attr[2] 76 } 77 } 78 } 79 list = append(list, seg) 80 } 81 } 82 pos = cap[1] 83 } 84 if len(htmlStr) > pos { 85 seg.Cat = 'T' 86 seg.Str = htmlStr[pos:] 87 seg.Attr = nil 88 list = append(list, seg) 89 } 90 } else { 91 list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil}) 92 } 93 return 94 } 95 96 // HTMLBasicType is used for rendering a very basic subset of HTML. It supports 97 // only hyperlinks and bold, italic and underscore attributes. In the Link 98 // structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color 99 // of hyperlinks. The Bold, Italic and Underscore values define the hyperlink 100 // style. 101 type HTMLBasicType struct { 102 pdf *Fpdf 103 Link struct { 104 ClrR, ClrG, ClrB int 105 Bold, Italic, Underscore bool 106 } 107 } 108 109 // HTMLBasicNew returns an instance that facilitates writing basic HTML in the 110 // specified PDF file. 111 func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) { 112 html.pdf = f 113 html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128 114 html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true 115 return 116 } 117 118 // Write prints text from the current position using the currently selected 119 // font. See HTMLBasicNew() to create a receiver that is associated with the 120 // PDF document instance. The text can be encoded with a basic subset of HTML 121 // that includes hyperlinks and tags for italic (I), bold (B), underscore 122 // (U) and center (CENTER) attributes. When the right margin is reached a line 123 // break occurs and text continues from the left margin. Upon method exit, the 124 // current position is left at the end of the text. 125 // 126 // lineHt indicates the line height in the unit of measure specified in New(). 127 func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) { 128 var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int 129 var textR, textG, textB = html.pdf.GetTextColor() 130 var hrefStr string 131 if html.Link.Bold { 132 linkBold = 1 133 } 134 if html.Link.Italic { 135 linkItalic = 1 136 } 137 if html.Link.Underscore { 138 linkUnderscore = 1 139 } 140 setStyle := func(boldAdj, italicAdj, underscoreAdj int) { 141 styleStr := "" 142 boldLvl += boldAdj 143 if boldLvl > 0 { 144 styleStr += "B" 145 } 146 italicLvl += italicAdj 147 if italicLvl > 0 { 148 styleStr += "I" 149 } 150 underscoreLvl += underscoreAdj 151 if underscoreLvl > 0 { 152 styleStr += "U" 153 } 154 html.pdf.SetFont("", styleStr, 0) 155 } 156 putLink := func(urlStr, txtStr string) { 157 // Put a hyperlink 158 html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB) 159 setStyle(linkBold, linkItalic, linkUnderscore) 160 html.pdf.WriteLinkString(lineHt, txtStr, urlStr) 161 setStyle(-linkBold, -linkItalic, -linkUnderscore) 162 html.pdf.SetTextColor(textR, textG, textB) 163 } 164 list := HTMLBasicTokenize(htmlStr) 165 var ok bool 166 alignStr := "L" 167 for _, el := range list { 168 switch el.Cat { 169 case 'T': 170 if len(hrefStr) > 0 { 171 putLink(hrefStr, el.Str) 172 hrefStr = "" 173 } else { 174 if alignStr == "C" || alignStr == "R" { 175 html.pdf.WriteAligned(0, lineHt, el.Str, alignStr) 176 } else { 177 html.pdf.Write(lineHt, el.Str) 178 } 179 } 180 case 'O': 181 switch el.Str { 182 case "b": 183 setStyle(1, 0, 0) 184 case "i": 185 setStyle(0, 1, 0) 186 case "u": 187 setStyle(0, 0, 1) 188 case "br": 189 html.pdf.Ln(lineHt) 190 case "center": 191 html.pdf.Ln(lineHt) 192 alignStr = "C" 193 case "right": 194 html.pdf.Ln(lineHt) 195 alignStr = "R" 196 case "left": 197 html.pdf.Ln(lineHt) 198 alignStr = "L" 199 case "a": 200 hrefStr, ok = el.Attr["href"] 201 if !ok { 202 hrefStr = "" 203 } 204 } 205 case 'C': 206 switch el.Str { 207 case "b": 208 setStyle(-1, 0, 0) 209 case "i": 210 setStyle(0, -1, 0) 211 case "u": 212 setStyle(0, 0, -1) 213 case "center": 214 html.pdf.Ln(lineHt) 215 alignStr = "L" 216 case "right": 217 html.pdf.Ln(lineHt) 218 alignStr = "L" 219 } 220 } 221 } 222 }