github.com/Azareal/Gosora@v0.0.0-20210729070923-553e66b59003/extend/plugin_bbcode.go (about)

     1  package extend
     2  
     3  import (
     4  	"bytes"
     5  	"math/rand"
     6  	"regexp"
     7  	"strconv"
     8  	"time"
     9  
    10  	c "github.com/Azareal/Gosora/common"
    11  )
    12  
    13  var bbcodeRandom *rand.Rand
    14  var bbcodeInvalidNumber []byte
    15  var bbcodeNoNegative []byte
    16  var bbcodeMissingTag []byte
    17  
    18  var bbcodeBold *regexp.Regexp
    19  var bbcodeItalic *regexp.Regexp
    20  var bbcodeUnderline *regexp.Regexp
    21  var bbcodeStrike *regexp.Regexp
    22  var bbcodeH1 *regexp.Regexp
    23  var bbcodeURL *regexp.Regexp
    24  var bbcodeURLLabel *regexp.Regexp
    25  var bbcodeQuotes *regexp.Regexp
    26  var bbcodeCode *regexp.Regexp
    27  var bbcodeSpoiler *regexp.Regexp
    28  
    29  func init() {
    30  	c.Plugins.Add(&c.Plugin{UName: "bbcode", Name: "BBCode", Author: "Azareal", URL: "https://github.com/Azareal", Init: InitBbcode, Deactivate: deactivateBbcode})
    31  }
    32  
    33  func InitBbcode(pl *c.Plugin) error {
    34  	bbcodeInvalidNumber = []byte("<red>[Invalid Number]</red>")
    35  	bbcodeNoNegative = []byte("<red>[No Negative Numbers]</red>")
    36  	bbcodeMissingTag = []byte("<red>[Missing Tag]</red>")
    37  
    38  	bbcodeBold = regexp.MustCompile(`(?s)\[b\](.*)\[/b\]`)
    39  	bbcodeItalic = regexp.MustCompile(`(?s)\[i\](.*)\[/i\]`)
    40  	bbcodeUnderline = regexp.MustCompile(`(?s)\[u\](.*)\[/u\]`)
    41  	bbcodeStrike = regexp.MustCompile(`(?s)\[s\](.*)\[/s\]`)
    42  	bbcodeH1 = regexp.MustCompile(`(?s)\[h1\](.*)\[/h1\]`)
    43  	urlpattern := `(http|https|ftp|mailto*)(:??)\/\/([\.a-zA-Z\/]+)`
    44  	bbcodeURL = regexp.MustCompile(`\[url\]` + urlpattern + `\[/url\]`)
    45  	bbcodeURLLabel = regexp.MustCompile(`(?s)\[url=` + urlpattern + `\](.*)\[/url\]`)
    46  	bbcodeQuotes = regexp.MustCompile(`\[quote\](.*)\[/quote\]`)
    47  	bbcodeCode = regexp.MustCompile(`\[code\](.*)\[/code\]`)
    48  	bbcodeSpoiler = regexp.MustCompile(`\[spoiler\](.*)\[/spoiler\]`)
    49  
    50  	bbcodeRandom = rand.New(rand.NewSource(time.Now().UnixNano()))
    51  
    52  	pl.AddHook("parse_assign", BbcodeFullParse)
    53  	pl.AddHook("topic_ogdesc_assign", BbcodeStripTags)
    54  	return nil
    55  }
    56  
    57  func deactivateBbcode(pl *c.Plugin) {
    58  	pl.RemoveHook("parse_assign", BbcodeFullParse)
    59  	pl.RemoveHook("topic_ogdesc_assign", BbcodeStripTags)
    60  }
    61  
    62  func BbcodeStripTags(msg string) string {
    63  	msg = bbcodeBold.ReplaceAllString(msg, "$1")
    64  	msg = bbcodeItalic.ReplaceAllString(msg, "$1")
    65  	msg = bbcodeUnderline.ReplaceAllString(msg, "$1")
    66  	msg = bbcodeStrike.ReplaceAllString(msg, "$1")
    67  	return msg
    68  }
    69  
    70  func BbcodeRegexParse(msg string) string {
    71  	msg = bbcodeBold.ReplaceAllString(msg, "<b>$1</b>")
    72  	msg = bbcodeItalic.ReplaceAllString(msg, "<i>$1</i>")
    73  	msg = bbcodeUnderline.ReplaceAllString(msg, "<u>$1</u>")
    74  	msg = bbcodeStrike.ReplaceAllString(msg, "<s>$1</s>")
    75  	msg = bbcodeURL.ReplaceAllString(msg, "<a href=''$1$2//$3' rel='ugc'>$1$2//$3</i>")
    76  	msg = bbcodeURLLabel.ReplaceAllString(msg, "<a href=''$1$2//$3' rel='ugc'>$4</i>")
    77  	msg = bbcodeQuotes.ReplaceAllString(msg, "<blockquote>$1</blockquote>")
    78  	msg = bbcodeSpoiler.ReplaceAllString(msg, "<spoiler>$1</spoiler>")
    79  	msg = bbcodeH1.ReplaceAllString(msg, "<h2>$1</h2>")
    80  	//msg = bbcodeCode.ReplaceAllString(msg,"<span class='codequotes'>$1</span>")
    81  	return msg
    82  }
    83  
    84  // Only does the simple BBCode like [u], [b], [i] and [s]
    85  func bbcodeSimpleParse(msg string) string {
    86  	var hasU, hasB, hasI, hasS bool
    87  	mbytes := []byte(msg)
    88  	for i := 0; (i + 2) < len(mbytes); i++ {
    89  		if mbytes[i] == '[' && mbytes[i+2] == ']' {
    90  			ch := mbytes[i+1]
    91  			if ch == 'b' && !hasB {
    92  				mbytes[i] = '<'
    93  				mbytes[i+2] = '>'
    94  				hasB = true
    95  			} else if ch == 'i' && !hasI {
    96  				mbytes[i] = '<'
    97  				mbytes[i+2] = '>'
    98  				hasI = true
    99  			} else if ch == 'u' && !hasU {
   100  				mbytes[i] = '<'
   101  				mbytes[i+2] = '>'
   102  				hasU = true
   103  			} else if ch == 's' && !hasS {
   104  				mbytes[i] = '<'
   105  				mbytes[i+2] = '>'
   106  				hasS = true
   107  			}
   108  			i += 2
   109  		}
   110  	}
   111  
   112  	// There's an unclosed tag in there x.x
   113  	if hasI || hasU || hasB || hasS {
   114  		closeUnder := []byte("</u>")
   115  		closeItalic := []byte("</i>")
   116  		closeBold := []byte("</b>")
   117  		closeStrike := []byte("</s>")
   118  		if hasI {
   119  			mbytes = append(mbytes, closeItalic...)
   120  		}
   121  		if hasU {
   122  			mbytes = append(mbytes, closeUnder...)
   123  		}
   124  		if hasB {
   125  			mbytes = append(mbytes, closeBold...)
   126  		}
   127  		if hasS {
   128  			mbytes = append(mbytes, closeStrike...)
   129  		}
   130  	}
   131  	return string(mbytes)
   132  }
   133  
   134  // Here for benchmarking purposes. Might add a plugin setting for disabling [code] as it has it's paws everywhere
   135  func BbcodeParseWithoutCode(msg string) string {
   136  	var hasU, hasB, hasI, hasS bool
   137  	var complexBbc bool
   138  	mbytes := []byte(msg)
   139  	for i := 0; (i + 3) < len(mbytes); i++ {
   140  		if mbytes[i] == '[' {
   141  			if mbytes[i+2] != ']' {
   142  				if mbytes[i+1] == '/' {
   143  					if mbytes[i+3] == ']' {
   144  						switch mbytes[i+2] {
   145  						case 'b':
   146  							mbytes[i] = '<'
   147  							mbytes[i+3] = '>'
   148  							hasB = false
   149  						case 'i':
   150  							mbytes[i] = '<'
   151  							mbytes[i+3] = '>'
   152  							hasI = false
   153  						case 'u':
   154  							mbytes[i] = '<'
   155  							mbytes[i+3] = '>'
   156  							hasU = false
   157  						case 's':
   158  							mbytes[i] = '<'
   159  							mbytes[i+3] = '>'
   160  							hasS = false
   161  						}
   162  						i += 3
   163  					} else {
   164  						complexBbc = true
   165  					}
   166  				} else {
   167  					complexBbc = true
   168  				}
   169  			} else {
   170  				ch := mbytes[i+1]
   171  				if ch == 'b' && !hasB {
   172  					mbytes[i] = '<'
   173  					mbytes[i+2] = '>'
   174  					hasB = true
   175  				} else if ch == 'i' && !hasI {
   176  					mbytes[i] = '<'
   177  					mbytes[i+2] = '>'
   178  					hasI = true
   179  				} else if ch == 'u' && !hasU {
   180  					mbytes[i] = '<'
   181  					mbytes[i+2] = '>'
   182  					hasU = true
   183  				} else if ch == 's' && !hasS {
   184  					mbytes[i] = '<'
   185  					mbytes[i+2] = '>'
   186  					hasS = true
   187  				}
   188  				i += 2
   189  			}
   190  		}
   191  	}
   192  
   193  	// There's an unclosed tag in there x.x
   194  	if hasI || hasU || hasB || hasS {
   195  		closeUnder := []byte("</u>")
   196  		closeItalic := []byte("</i>")
   197  		closeBold := []byte("</b>")
   198  		closeStrike := []byte("</s>")
   199  		if hasI {
   200  			mbytes = append(bytes.TrimSpace(mbytes), closeItalic...)
   201  		}
   202  		if hasU {
   203  			mbytes = append(bytes.TrimSpace(mbytes), closeUnder...)
   204  		}
   205  		if hasB {
   206  			mbytes = append(bytes.TrimSpace(mbytes), closeBold...)
   207  		}
   208  		if hasS {
   209  			mbytes = append(bytes.TrimSpace(mbytes), closeStrike...)
   210  		}
   211  	}
   212  
   213  	// Copy the new complex parser over once the rough edges have been smoothed over
   214  	if complexBbc {
   215  		msg = string(mbytes)
   216  		msg = bbcodeURL.ReplaceAllString(msg, "<a href='$1$2//$3' rel='ugc'>$1$2//$3</i>")
   217  		msg = bbcodeURLLabel.ReplaceAllString(msg, "<a href='$1$2//$3' rel='ugc'>$4</i>")
   218  		msg = bbcodeSpoiler.ReplaceAllString(msg, "<spoiler>$1</spoiler>")
   219  		msg = bbcodeQuotes.ReplaceAllString(msg, "<blockquote>$1</blockquote>")
   220  		return bbcodeCode.ReplaceAllString(msg, "<span class='codequotes'>$1</span>")
   221  	}
   222  	return string(mbytes)
   223  }
   224  
   225  // Does every type of BBCode
   226  func BbcodeFullParse(msg string) string {
   227  	var hasU, hasB, hasI, hasS, hasC bool
   228  	var complexBbc bool
   229  
   230  	mbytes := []byte(msg)
   231  	mbytes = append(mbytes, c.SpaceGap...)
   232  	for i := 0; i < len(mbytes); i++ {
   233  		if mbytes[i] == '[' {
   234  			if mbytes[i+2] != ']' {
   235  				if mbytes[i+1] == '/' {
   236  					if mbytes[i+3] == ']' {
   237  						if !hasC {
   238  							switch mbytes[i+2] {
   239  							case 'b':
   240  								mbytes[i] = '<'
   241  								mbytes[i+3] = '>'
   242  								hasB = false
   243  							case 'i':
   244  								mbytes[i] = '<'
   245  								mbytes[i+3] = '>'
   246  								hasI = false
   247  							case 'u':
   248  								mbytes[i] = '<'
   249  								mbytes[i+3] = '>'
   250  								hasU = false
   251  							case 's':
   252  								mbytes[i] = '<'
   253  								mbytes[i+3] = '>'
   254  								hasS = false
   255  							}
   256  							i += 3
   257  						}
   258  					} else {
   259  						if mbytes[i+6] == ']' && mbytes[i+2] == 'c' && mbytes[i+3] == 'o' && mbytes[i+4] == 'd' && mbytes[i+5] == 'e' {
   260  							hasC = false
   261  							i += 7
   262  						}
   263  						complexBbc = true
   264  					}
   265  				} else {
   266  					// Put the biggest index first to avoid unnecessary bounds checks
   267  					if mbytes[i+5] == ']' && mbytes[i+1] == 'c' && mbytes[i+2] == 'o' && mbytes[i+3] == 'd' && mbytes[i+4] == 'e' {
   268  						hasC = true
   269  						i += 6
   270  					}
   271  					complexBbc = true
   272  				}
   273  			} else if !hasC {
   274  				ch := mbytes[i+1]
   275  				if ch == 'b' && !hasB {
   276  					mbytes[i] = '<'
   277  					mbytes[i+2] = '>'
   278  					hasB = true
   279  				} else if ch == 'i' && !hasI {
   280  					mbytes[i] = '<'
   281  					mbytes[i+2] = '>'
   282  					hasI = true
   283  				} else if ch == 'u' && !hasU {
   284  					mbytes[i] = '<'
   285  					mbytes[i+2] = '>'
   286  					hasU = true
   287  				} else if ch == 's' && !hasS {
   288  					mbytes[i] = '<'
   289  					mbytes[i+2] = '>'
   290  					hasS = true
   291  				}
   292  				i += 2
   293  			}
   294  		}
   295  	}
   296  
   297  	// There's an unclosed tag in there somewhere x.x
   298  	if hasI || hasU || hasB || hasS {
   299  		closeUnder := []byte("</u>")
   300  		closeItalic := []byte("</i>")
   301  		closeBold := []byte("</b>")
   302  		closeStrike := []byte("</s>")
   303  		if hasI {
   304  			mbytes = append(bytes.TrimSpace(mbytes), closeItalic...)
   305  		}
   306  		if hasU {
   307  			mbytes = append(bytes.TrimSpace(mbytes), closeUnder...)
   308  		}
   309  		if hasB {
   310  			mbytes = append(bytes.TrimSpace(mbytes), closeBold...)
   311  		}
   312  		if hasS {
   313  			mbytes = append(bytes.TrimSpace(mbytes), closeStrike...)
   314  		}
   315  		mbytes = append(mbytes, c.SpaceGap...)
   316  	}
   317  
   318  	if complexBbc {
   319  		i := 0
   320  		var start, lastTag int
   321  		var outbytes []byte
   322  		for ; i < len(mbytes); i++ {
   323  			if mbytes[i] == '[' {
   324  				if mbytes[i+1] == 'u' {
   325  					if mbytes[i+4] == ']' && mbytes[i+2] == 'r' && mbytes[i+3] == 'l' {
   326  						i, start, lastTag, outbytes = bbcodeParseURL(i, start, lastTag, mbytes, outbytes)
   327  						continue
   328  					}
   329  				} else if mbytes[i+1] == 'r' {
   330  					if bytes.Equal(mbytes[i+2:i+6], []byte("and]")) {
   331  						i, start, lastTag, outbytes = bbcodeParseRand(i, start, lastTag, mbytes, outbytes)
   332  					}
   333  				}
   334  			}
   335  		}
   336  		if lastTag != i {
   337  			outbytes = append(outbytes, mbytes[lastTag:]...)
   338  		}
   339  
   340  		if len(outbytes) != 0 {
   341  			msg = string(outbytes[0 : len(outbytes)-10])
   342  		} else {
   343  			msg = string(mbytes[0 : len(mbytes)-10])
   344  		}
   345  
   346  		// TODO: Optimise these
   347  		//msg = bbcode_url.ReplaceAllString(msg,"<a href=\"$1$2//$3\" rel=\"ugc\">$1$2//$3</i>")
   348  		msg = bbcodeURLLabel.ReplaceAllString(msg, "<a href='$1$2//$3' rel='ugc'>$4</i>")
   349  		msg = bbcodeQuotes.ReplaceAllString(msg, "<blockquote>$1</blockquote>")
   350  		msg = bbcodeCode.ReplaceAllString(msg, "<span class='codequotes'>$1</span>")
   351  		msg = bbcodeSpoiler.ReplaceAllString(msg, "<spoiler>$1</spoiler>")
   352  		msg = bbcodeH1.ReplaceAllString(msg, "<h2>$1</h2>")
   353  	} else {
   354  		msg = string(mbytes[0 : len(mbytes)-10])
   355  	}
   356  
   357  	return msg
   358  }
   359  
   360  // TODO: Strip the containing [url] so the media parser can work it's magic instead? Or do we want to allow something like [url=]label[/url] here?
   361  func bbcodeParseURL(i int, start int, lastTag int, mbytes []byte, outbytes []byte) (int, int, int, []byte) {
   362  	start = i + 5
   363  	outbytes = append(outbytes, mbytes[lastTag:i]...)
   364  	i = start
   365  	i += c.PartialURLStringLen2(string(mbytes[start:]))
   366  	if !bytes.Equal(mbytes[i:i+6], []byte("[/url]")) {
   367  		outbytes = append(outbytes, c.InvalidURL...)
   368  		return i, start, lastTag, outbytes
   369  	}
   370  
   371  	outbytes = append(outbytes, c.URLOpen...)
   372  	outbytes = append(outbytes, mbytes[start:i]...)
   373  	outbytes = append(outbytes, c.URLOpen2...)
   374  	outbytes = append(outbytes, mbytes[start:i]...)
   375  	outbytes = append(outbytes, c.URLClose...)
   376  	i += 6
   377  	lastTag = i
   378  
   379  	return i, start, lastTag, outbytes
   380  }
   381  
   382  func bbcodeParseRand(i int, start int, lastTag int, msgbytes []byte, outbytes []byte) (int, int, int, []byte) {
   383  	outbytes = append(outbytes, msgbytes[lastTag:i]...)
   384  	start = i + 6
   385  	i = start
   386  	for ; ; i++ {
   387  		if msgbytes[i] == '[' {
   388  			if !bytes.Equal(msgbytes[i+1:i+7], []byte("/rand]")) {
   389  				outbytes = append(outbytes, bbcodeMissingTag...)
   390  				return i, start, lastTag, outbytes
   391  			}
   392  			break
   393  		} else if (len(msgbytes) - 1) < (i + 10) {
   394  			outbytes = append(outbytes, bbcodeMissingTag...)
   395  			return i, start, lastTag, outbytes
   396  		}
   397  	}
   398  
   399  	number, err := strconv.ParseInt(string(msgbytes[start:i]), 10, 64)
   400  	if err != nil {
   401  		outbytes = append(outbytes, bbcodeInvalidNumber...)
   402  		return i, start, lastTag, outbytes
   403  	}
   404  
   405  	// TODO: Add support for negative numbers?
   406  	if number < 0 {
   407  		outbytes = append(outbytes, bbcodeNoNegative...)
   408  		return i, start, lastTag, outbytes
   409  	}
   410  
   411  	var dat []byte
   412  	if number == 0 {
   413  		dat = []byte("0")
   414  	} else {
   415  		dat = []byte(strconv.FormatInt((bbcodeRandom.Int63n(number)), 10))
   416  	}
   417  
   418  	outbytes = append(outbytes, dat...)
   419  	i += 7
   420  	lastTag = i
   421  	return i, start, lastTag, outbytes
   422  }