github.com/webx-top/com@v1.2.12/html.go (about)

     1  // Copyright 2013 com authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License"): you may
     4  // not use this file except in compliance with the License. You may obtain
     5  // a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    11  // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
    12  // License for the specific language governing permissions and limitations
    13  // under the License.
    14  
    15  package com
    16  
    17  import (
    18  	"html"
    19  	"regexp"
    20  	"strings"
    21  )
    22  
    23  var html2jsReplacer = strings.NewReplacer(
    24  	`\`, `\\`,
    25  	"\n", `\n`,
    26  	"\r", "",
    27  	`"`, `\"`,
    28  )
    29  
    30  // HTML2JS converts []byte type of HTML content into JS format.
    31  func HTML2JS(data []byte) []byte {
    32  	s := string(data)
    33  	s = html2jsReplacer.Replace(s)
    34  	return []byte(s)
    35  }
    36  
    37  // HTMLEncode encode html chars to string
    38  func HTMLEncode(str string) string {
    39  	return html.EscapeString(str)
    40  }
    41  
    42  // HTMLDecode decode string to html chars
    43  func HTMLDecode(str string) string {
    44  	return html.UnescapeString(str)
    45  }
    46  
    47  // HTMLDecodeAll decode string to html chars
    48  func HTMLDecodeAll(text string) string {
    49  	original := text
    50  	text = HTMLDecode(text)
    51  	if original == text {
    52  		return text
    53  	}
    54  	return HTMLDecodeAll(text)
    55  }
    56  
    57  var (
    58  	regexpAnyHTMLTag    = regexp.MustCompile(`<[\S\s]+?>`)
    59  	regexpStyleHTMLTag  = regexp.MustCompile(`<(?i:style)[\S\s]+?</(?i:style)[^>]*>`)
    60  	regexpScriptHTMLTag = regexp.MustCompile(`<(?i:script)[\S\s]+?</(?i:script)[^>]*>`)
    61  	regexpMoreSpace     = regexp.MustCompile(`([\s]){2,}`)
    62  	regexpMoreNewline   = regexp.MustCompile("(\n){2,}")
    63  	regexpAnyHTMLAttr   = regexp.MustCompile(`<[/]?[\S]+[^>]*>`)
    64  	regexpBrHTMLTag     = regexp.MustCompile("<(?i:br)[^>]*>")
    65  )
    66  
    67  // ClearHTMLAttr clear all attributes
    68  func ClearHTMLAttr(src string) string {
    69  	src = regexpAnyHTMLAttr.ReplaceAllString(src, "<$1$2>")
    70  	return src
    71  }
    72  
    73  // TextLine Single line of text
    74  func TextLine(src string) string {
    75  	src = StripTags(src)
    76  	return RemoveEOL(src)
    77  }
    78  
    79  // CleanMoreNl remove all \n(2+)
    80  func CleanMoreNl(src string) string {
    81  	return regexpMoreNewline.ReplaceAllString(src, "$1")
    82  }
    83  
    84  // CleanMoreSpace remove all spaces(2+)
    85  func CleanMoreSpace(src string) string {
    86  	return regexpMoreSpace.ReplaceAllString(src, "$1")
    87  }
    88  
    89  // StripTags strip tags in html string
    90  func StripTags(src string) string {
    91  	//将HTML标签全转换成小写
    92  	//src = regexpAnyHTMLTag.ReplaceAllStringFunc(src, strings.ToLower)
    93  
    94  	//remove tag <style>
    95  	src = regexpStyleHTMLTag.ReplaceAllString(src, "")
    96  
    97  	//remove tag <script>
    98  	src = regexpScriptHTMLTag.ReplaceAllString(src, "")
    99  
   100  	//replace all html tag into \n
   101  	src = regexpAnyHTMLTag.ReplaceAllString(src, "\n")
   102  	src = CleanMoreSpace(src)
   103  
   104  	return strings.TrimSpace(src)
   105  }
   106  
   107  var nl2brReplacer = strings.NewReplacer(
   108  	"\r", "",
   109  	"\n", "<br />",
   110  )
   111  
   112  // Nl2br change \n to <br/>
   113  func Nl2br(str string) string {
   114  	return nl2brReplacer.Replace(str)
   115  }
   116  
   117  // Br2nl change <br/> to \n
   118  func Br2nl(str string) string {
   119  	return regexpBrHTMLTag.ReplaceAllString(str, "\n")
   120  }