github.com/godaddy-x/freego@v1.0.156/goquery/html_valid.go (about) 1 package goquery 2 3 import ( 4 "fmt" 5 "github.com/godaddy-x/freego/utils" 6 "strings" 7 ) 8 9 var ( 10 access_tag = []string{"h4", "h2", "section"} 11 access_style = []string{"text-decoration", "line-through", "font-style", "color", "text-align", "font-weight"} 12 ) 13 14 type HtmlValidResult struct { 15 NewContent string 16 ContentLen int 17 FailMsg string 18 } 19 20 func ValidImgURL(content, prefix string) error { 21 if strings.HasPrefix(content, prefix) { 22 if utils.ValidPattern(strings.ReplaceAll(content, prefix, ""), "\\d{19}/\\d{19}\\.jpg") { 23 return nil 24 } 25 } 26 return utils.Error("图片URL无效") 27 } 28 29 func ValidZxHtml(htmlstr string) *HtmlValidResult { 30 r := strings.NewReader(utils.AddStr("<content>", htmlstr, "</content>")) 31 doc, err := NewDocumentFromReader(r) 32 if err != nil { 33 fmt.Println(err) 34 return &HtmlValidResult{FailMsg: "解析html数据失败"} 35 } 36 children := doc.Find("content").Children() 37 if children.Length() == 0 { 38 return &HtmlValidResult{FailMsg: "无匹配数据"} 39 } 40 validResult := &HtmlValidResult{} 41 children.Each(func(i int, v *Selection) { 42 if len(validResult.FailMsg) > 0 { 43 return 44 } 45 // 样式校验 46 tag := "" 47 style := "" 48 for _, v := range v.Nodes { 49 if !utils.CheckStr(v.Data, access_tag...) { 50 validResult.FailMsg = "Tag类型无效" 51 return 52 } 53 tag = v.Data 54 if len(v.Attr) == 0 { 55 continue 56 } 57 if len(v.Attr) > 1 { 58 validResult.FailMsg = "无效的样式" 59 return 60 } 61 attr := v.Attr[0] 62 if attr.Key != "style" { 63 validResult.FailMsg = "样式校验失败" 64 return 65 } 66 style = attr.Val 67 split := strings.Split(attr.Val, ";") 68 for _, v := range split { 69 if len(v) == 0 { 70 continue 71 } 72 split2 := strings.Split(v, ":") 73 if len(split2) == 2 { 74 if !utils.CheckStr(strings.TrimSpace(split2[0]), access_style...) { 75 validResult.FailMsg = "不支持的样式" 76 return 77 } 78 } else { 79 validResult.FailMsg = "样式异常" 80 return 81 } 82 } 83 } 84 // 内容校验 85 content := []rune(v.Text()) 86 content_len := len(content) 87 new_content := make([]rune, 0, content_len+16) 88 for i := 0; i < content_len; i++ { 89 v := content[i] 90 if v == '<' { 91 new_content = append(new_content, '<') 92 } else if v == '>' { 93 new_content = append(new_content, '>') 94 } else if v == '\'' { 95 new_content = append(new_content, '‘') 96 } else if v == '"' { 97 new_content = append(new_content, '“') 98 } else if v == '&' { 99 new_content = append(new_content, '&') 100 } else if v == '\\' { 101 new_content = append(new_content, '\') 102 } else if v == '#' { 103 new_content = append(new_content, '#') 104 } else if v == ':' { 105 new_content = append(new_content, ':') 106 } else if v == ';' { 107 new_content = append(new_content, ';') 108 } else if v == '.' { 109 new_content = append(new_content, '。') 110 } else if v == '%' { 111 if content_len >= i+2 { 112 if content[i+1] == '3' && (content[i+2] == 'c' || content[i+2] == 'C') { 113 new_content = append(new_content, '<') 114 i += 2 115 continue 116 } 117 if content[i+1] == '6' && content[i+2] == '0' { 118 new_content = append(new_content, '<') 119 i += 2 120 continue 121 } 122 if content[i+1] == '3' && (content[i+2] == 'e' || content[i+2] == 'E') { 123 new_content = append(new_content, '>') 124 i += 2 125 continue 126 } 127 if content[i+1] == '6' && content[i+2] == '2' { 128 new_content = append(new_content, '>') 129 i += 2 130 continue 131 } 132 } 133 } else { 134 new_content = append(new_content, v) 135 } 136 } 137 if len(style) > 0 { 138 style = utils.AddStr(" style='", style, "'") 139 } 140 validResult.ContentLen = validResult.ContentLen + utils.Len(strings.TrimSpace(v.Text())) 141 validResult.NewContent = utils.AddStr(validResult.NewContent, "<", tag, style, ">", string(new_content), "</", tag, ">") 142 }) 143 if len(validResult.FailMsg) > 0 { 144 validResult.ContentLen = 0 145 validResult.NewContent = "" 146 } 147 return validResult 148 }