github.com/webx-top/com@v1.2.12/html.go (about) 1 // Copyright 2013 com authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"): you may 4 // not use this file except in compliance with the License. You may obtain 5 // a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12 // License for the specific language governing permissions and limitations 13 // under the License. 14 15 package com 16 17 import ( 18 "html" 19 "regexp" 20 "strings" 21 ) 22 23 var html2jsReplacer = strings.NewReplacer( 24 `\`, `\\`, 25 "\n", `\n`, 26 "\r", "", 27 `"`, `\"`, 28 ) 29 30 // HTML2JS converts []byte type of HTML content into JS format. 31 func HTML2JS(data []byte) []byte { 32 s := string(data) 33 s = html2jsReplacer.Replace(s) 34 return []byte(s) 35 } 36 37 // HTMLEncode encode html chars to string 38 func HTMLEncode(str string) string { 39 return html.EscapeString(str) 40 } 41 42 // HTMLDecode decode string to html chars 43 func HTMLDecode(str string) string { 44 return html.UnescapeString(str) 45 } 46 47 // HTMLDecodeAll decode string to html chars 48 func HTMLDecodeAll(text string) string { 49 original := text 50 text = HTMLDecode(text) 51 if original == text { 52 return text 53 } 54 return HTMLDecodeAll(text) 55 } 56 57 var ( 58 regexpAnyHTMLTag = regexp.MustCompile(`<[\S\s]+?>`) 59 regexpStyleHTMLTag = regexp.MustCompile(`<(?i:style)[\S\s]+?</(?i:style)[^>]*>`) 60 regexpScriptHTMLTag = regexp.MustCompile(`<(?i:script)[\S\s]+?</(?i:script)[^>]*>`) 61 regexpMoreSpace = regexp.MustCompile(`([\s]){2,}`) 62 regexpMoreNewline = regexp.MustCompile("(\n){2,}") 63 regexpAnyHTMLAttr = regexp.MustCompile(`<[/]?[\S]+[^>]*>`) 64 regexpBrHTMLTag = regexp.MustCompile("<(?i:br)[^>]*>") 65 ) 66 67 // ClearHTMLAttr clear all attributes 68 func ClearHTMLAttr(src string) string { 69 src = regexpAnyHTMLAttr.ReplaceAllString(src, "<$1$2>") 70 return src 71 } 72 73 // TextLine Single line of text 74 func TextLine(src string) string { 75 src = StripTags(src) 76 return RemoveEOL(src) 77 } 78 79 // CleanMoreNl remove all \n(2+) 80 func CleanMoreNl(src string) string { 81 return regexpMoreNewline.ReplaceAllString(src, "$1") 82 } 83 84 // CleanMoreSpace remove all spaces(2+) 85 func CleanMoreSpace(src string) string { 86 return regexpMoreSpace.ReplaceAllString(src, "$1") 87 } 88 89 // StripTags strip tags in html string 90 func StripTags(src string) string { 91 //将HTML标签全转换成小写 92 //src = regexpAnyHTMLTag.ReplaceAllStringFunc(src, strings.ToLower) 93 94 //remove tag <style> 95 src = regexpStyleHTMLTag.ReplaceAllString(src, "") 96 97 //remove tag <script> 98 src = regexpScriptHTMLTag.ReplaceAllString(src, "") 99 100 //replace all html tag into \n 101 src = regexpAnyHTMLTag.ReplaceAllString(src, "\n") 102 src = CleanMoreSpace(src) 103 104 return strings.TrimSpace(src) 105 } 106 107 var nl2brReplacer = strings.NewReplacer( 108 "\r", "", 109 "\n", "<br />", 110 ) 111 112 // Nl2br change \n to <br/> 113 func Nl2br(str string) string { 114 return nl2brReplacer.Replace(str) 115 } 116 117 // Br2nl change <br/> to \n 118 func Br2nl(str string) string { 119 return regexpBrHTMLTag.ReplaceAllString(str, "\n") 120 }