codeberg.org/go-pdf/fpdf@v0.11.1/htmlbasic.go (about)

     1  // Copyright ©2023 The go-pdf Authors. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6   * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung)
     7   *
     8   * Permission to use, copy, modify, and distribute this software for any
     9   * purpose with or without fee is hereby granted, provided that the above
    10   * copyright notice and this permission notice appear in all copies.
    11   *
    12   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    13   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    14   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    15   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    16   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    17   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    18   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    19   */
    20  
    21  package fpdf
    22  
    23  import (
    24  	"regexp"
    25  	"strings"
    26  )
    27  
    28  // HTMLBasicSegmentType defines a segment of literal text in which the current
    29  // attributes do not vary, or an open tag or a close tag.
    30  type HTMLBasicSegmentType struct {
    31  	Cat  byte              // 'O' open tag, 'C' close tag, 'T' text
    32  	Str  string            // Literal text unchanged, tags are lower case
    33  	Attr map[string]string // Attribute keys are lower case
    34  }
    35  
    36  // HTMLBasicTokenize returns a list of HTML tags and literal elements. This is
    37  // done with regular expressions, so the result is only marginally better than
    38  // useless.
    39  func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) {
    40  	// This routine is adapted from http://www.fpdf.org/
    41  	list = make([]HTMLBasicSegmentType, 0, 16)
    42  	htmlStr = strings.Replace(htmlStr, "\n", " ", -1)
    43  	htmlStr = strings.Replace(htmlStr, "\r", "", -1)
    44  	tagRe, _ := regexp.Compile(`(?U)<.*>`)
    45  	attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`)
    46  	capList := tagRe.FindAllStringIndex(htmlStr, -1)
    47  	if capList != nil {
    48  		var seg HTMLBasicSegmentType
    49  		var parts []string
    50  		pos := 0
    51  		for _, cap := range capList {
    52  			if pos < cap[0] {
    53  				seg.Cat = 'T'
    54  				seg.Str = htmlStr[pos:cap[0]]
    55  				seg.Attr = nil
    56  				list = append(list, seg)
    57  			}
    58  			if htmlStr[cap[0]+1] == '/' {
    59  				seg.Cat = 'C'
    60  				seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1])
    61  				seg.Attr = nil
    62  				list = append(list, seg)
    63  			} else {
    64  				// Extract attributes
    65  				parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ")
    66  				if len(parts) > 0 {
    67  					for j, part := range parts {
    68  						if j == 0 {
    69  							seg.Cat = 'O'
    70  							seg.Str = strings.ToLower(parts[0])
    71  							seg.Attr = make(map[string]string)
    72  						} else {
    73  							attrList := attrRe.FindAllStringSubmatch(part, -1)
    74  							for _, attr := range attrList {
    75  								seg.Attr[strings.ToLower(attr[1])] = attr[2]
    76  							}
    77  						}
    78  					}
    79  					list = append(list, seg)
    80  				}
    81  			}
    82  			pos = cap[1]
    83  		}
    84  		if len(htmlStr) > pos {
    85  			seg.Cat = 'T'
    86  			seg.Str = htmlStr[pos:]
    87  			seg.Attr = nil
    88  			list = append(list, seg)
    89  		}
    90  	} else {
    91  		list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil})
    92  	}
    93  	return
    94  }
    95  
    96  // HTMLBasicType is used for rendering a very basic subset of HTML. It supports
    97  // only hyperlinks and bold, italic and underscore attributes. In the Link
    98  // structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color
    99  // of hyperlinks. The Bold, Italic and Underscore values define the hyperlink
   100  // style.
   101  type HTMLBasicType struct {
   102  	pdf  *Fpdf
   103  	Link struct {
   104  		ClrR, ClrG, ClrB         int
   105  		Bold, Italic, Underscore bool
   106  	}
   107  }
   108  
   109  // HTMLBasicNew returns an instance that facilitates writing basic HTML in the
   110  // specified PDF file.
   111  func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) {
   112  	html.pdf = f
   113  	html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128
   114  	html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true
   115  	return
   116  }
   117  
   118  // Write prints text from the current position using the currently selected
   119  // font. See HTMLBasicNew() to create a receiver that is associated with the
   120  // PDF document instance. The text can be encoded with a basic subset of HTML
   121  // that includes hyperlinks and tags for italic (I), bold (B), underscore
   122  // (U) and center (CENTER) attributes. When the right margin is reached a line
   123  // break occurs and text continues from the left margin. Upon method exit, the
   124  // current position is left at the end of the text.
   125  //
   126  // lineHt indicates the line height in the unit of measure specified in New().
   127  func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) {
   128  	var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int
   129  	var textR, textG, textB = html.pdf.GetTextColor()
   130  	var hrefStr string
   131  	if html.Link.Bold {
   132  		linkBold = 1
   133  	}
   134  	if html.Link.Italic {
   135  		linkItalic = 1
   136  	}
   137  	if html.Link.Underscore {
   138  		linkUnderscore = 1
   139  	}
   140  	setStyle := func(boldAdj, italicAdj, underscoreAdj int) {
   141  		styleStr := ""
   142  		boldLvl += boldAdj
   143  		if boldLvl > 0 {
   144  			styleStr += "B"
   145  		}
   146  		italicLvl += italicAdj
   147  		if italicLvl > 0 {
   148  			styleStr += "I"
   149  		}
   150  		underscoreLvl += underscoreAdj
   151  		if underscoreLvl > 0 {
   152  			styleStr += "U"
   153  		}
   154  		html.pdf.SetFont("", styleStr, 0)
   155  	}
   156  	putLink := func(urlStr, txtStr string) {
   157  		// Put a hyperlink
   158  		html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB)
   159  		setStyle(linkBold, linkItalic, linkUnderscore)
   160  		html.pdf.WriteLinkString(lineHt, txtStr, urlStr)
   161  		setStyle(-linkBold, -linkItalic, -linkUnderscore)
   162  		html.pdf.SetTextColor(textR, textG, textB)
   163  	}
   164  	list := HTMLBasicTokenize(htmlStr)
   165  	var ok bool
   166  	alignStr := "L"
   167  	for _, el := range list {
   168  		switch el.Cat {
   169  		case 'T':
   170  			if len(hrefStr) > 0 {
   171  				putLink(hrefStr, el.Str)
   172  				hrefStr = ""
   173  			} else {
   174  				if alignStr == "C" || alignStr == "R" {
   175  					html.pdf.WriteAligned(0, lineHt, el.Str, alignStr)
   176  				} else {
   177  					html.pdf.Write(lineHt, el.Str)
   178  				}
   179  			}
   180  		case 'O':
   181  			switch el.Str {
   182  			case "b":
   183  				setStyle(1, 0, 0)
   184  			case "i":
   185  				setStyle(0, 1, 0)
   186  			case "u":
   187  				setStyle(0, 0, 1)
   188  			case "br":
   189  				html.pdf.Ln(lineHt)
   190  			case "center":
   191  				html.pdf.Ln(lineHt)
   192  				alignStr = "C"
   193  			case "right":
   194  				html.pdf.Ln(lineHt)
   195  				alignStr = "R"
   196  			case "left":
   197  				html.pdf.Ln(lineHt)
   198  				alignStr = "L"
   199  			case "a":
   200  				hrefStr, ok = el.Attr["href"]
   201  				if !ok {
   202  					hrefStr = ""
   203  				}
   204  			}
   205  		case 'C':
   206  			switch el.Str {
   207  			case "b":
   208  				setStyle(-1, 0, 0)
   209  			case "i":
   210  				setStyle(0, -1, 0)
   211  			case "u":
   212  				setStyle(0, 0, -1)
   213  			case "center":
   214  				html.pdf.Ln(lineHt)
   215  				alignStr = "L"
   216  			case "right":
   217  				html.pdf.Ln(lineHt)
   218  				alignStr = "L"
   219  			}
   220  		}
   221  	}
   222  }