github.com/godaddy-x/freego@v1.0.156/goquery/html_valid.go (about)

     1  package goquery
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/godaddy-x/freego/utils"
     6  	"strings"
     7  )
     8  
     9  var (
    10  	access_tag   = []string{"h4", "h2", "section"}
    11  	access_style = []string{"text-decoration", "line-through", "font-style", "color", "text-align", "font-weight"}
    12  )
    13  
    14  type HtmlValidResult struct {
    15  	NewContent string
    16  	ContentLen int
    17  	FailMsg    string
    18  }
    19  
    20  func ValidImgURL(content, prefix string) error {
    21  	if strings.HasPrefix(content, prefix) {
    22  		if utils.ValidPattern(strings.ReplaceAll(content, prefix, ""), "\\d{19}/\\d{19}\\.jpg") {
    23  			return nil
    24  		}
    25  	}
    26  	return utils.Error("图片URL无效")
    27  }
    28  
    29  func ValidZxHtml(htmlstr string) *HtmlValidResult {
    30  	r := strings.NewReader(utils.AddStr("<content>", htmlstr, "</content>"))
    31  	doc, err := NewDocumentFromReader(r)
    32  	if err != nil {
    33  		fmt.Println(err)
    34  		return &HtmlValidResult{FailMsg: "解析html数据失败"}
    35  	}
    36  	children := doc.Find("content").Children()
    37  	if children.Length() == 0 {
    38  		return &HtmlValidResult{FailMsg: "无匹配数据"}
    39  	}
    40  	validResult := &HtmlValidResult{}
    41  	children.Each(func(i int, v *Selection) {
    42  		if len(validResult.FailMsg) > 0 {
    43  			return
    44  		}
    45  		// 样式校验
    46  		tag := ""
    47  		style := ""
    48  		for _, v := range v.Nodes {
    49  			if !utils.CheckStr(v.Data, access_tag...) {
    50  				validResult.FailMsg = "Tag类型无效"
    51  				return
    52  			}
    53  			tag = v.Data
    54  			if len(v.Attr) == 0 {
    55  				continue
    56  			}
    57  			if len(v.Attr) > 1 {
    58  				validResult.FailMsg = "无效的样式"
    59  				return
    60  			}
    61  			attr := v.Attr[0]
    62  			if attr.Key != "style" {
    63  				validResult.FailMsg = "样式校验失败"
    64  				return
    65  			}
    66  			style = attr.Val
    67  			split := strings.Split(attr.Val, ";")
    68  			for _, v := range split {
    69  				if len(v) == 0 {
    70  					continue
    71  				}
    72  				split2 := strings.Split(v, ":")
    73  				if len(split2) == 2 {
    74  					if !utils.CheckStr(strings.TrimSpace(split2[0]), access_style...) {
    75  						validResult.FailMsg = "不支持的样式"
    76  						return
    77  					}
    78  				} else {
    79  					validResult.FailMsg = "样式异常"
    80  					return
    81  				}
    82  			}
    83  		}
    84  		// 内容校验
    85  		content := []rune(v.Text())
    86  		content_len := len(content)
    87  		new_content := make([]rune, 0, content_len+16)
    88  		for i := 0; i < content_len; i++ {
    89  			v := content[i]
    90  			if v == '<' {
    91  				new_content = append(new_content, '<')
    92  			} else if v == '>' {
    93  				new_content = append(new_content, '>')
    94  			} else if v == '\'' {
    95  				new_content = append(new_content, '‘')
    96  			} else if v == '"' {
    97  				new_content = append(new_content, '“')
    98  			} else if v == '&' {
    99  				new_content = append(new_content, '&')
   100  			} else if v == '\\' {
   101  				new_content = append(new_content, '\')
   102  			} else if v == '#' {
   103  				new_content = append(new_content, '#')
   104  			} else if v == ':' {
   105  				new_content = append(new_content, ':')
   106  			} else if v == ';' {
   107  				new_content = append(new_content, ';')
   108  			} else if v == '.' {
   109  				new_content = append(new_content, '。')
   110  			} else if v == '%' {
   111  				if content_len >= i+2 {
   112  					if content[i+1] == '3' && (content[i+2] == 'c' || content[i+2] == 'C') {
   113  						new_content = append(new_content, '<')
   114  						i += 2
   115  						continue
   116  					}
   117  					if content[i+1] == '6' && content[i+2] == '0' {
   118  						new_content = append(new_content, '<')
   119  						i += 2
   120  						continue
   121  					}
   122  					if content[i+1] == '3' && (content[i+2] == 'e' || content[i+2] == 'E') {
   123  						new_content = append(new_content, '>')
   124  						i += 2
   125  						continue
   126  					}
   127  					if content[i+1] == '6' && content[i+2] == '2' {
   128  						new_content = append(new_content, '>')
   129  						i += 2
   130  						continue
   131  					}
   132  				}
   133  			} else {
   134  				new_content = append(new_content, v)
   135  			}
   136  		}
   137  		if len(style) > 0 {
   138  			style = utils.AddStr(" style='", style, "'")
   139  		}
   140  		validResult.ContentLen = validResult.ContentLen + utils.Len(strings.TrimSpace(v.Text()))
   141  		validResult.NewContent = utils.AddStr(validResult.NewContent, "<", tag, style, ">", string(new_content), "</", tag, ">")
   142  	})
   143  	if len(validResult.FailMsg) > 0 {
   144  		validResult.ContentLen = 0
   145  		validResult.NewContent = ""
   146  	}
   147  	return validResult
   148  }