github.com/Azareal/Gosora@v0.0.0-20210729070923-553e66b59003/common/parser.go (about) 1 package common 2 3 import ( 4 "bytes" 5 //"fmt" 6 //"log" 7 8 "net/url" 9 "path/filepath" 10 "regexp" 11 "strconv" 12 "strings" 13 "unicode/utf8" 14 ) 15 16 // TODO: Use the template system? 17 // TODO: Somehow localise these? 18 var SpaceGap = []byte(" ") 19 var httpProtBytes = []byte("http://") 20 var DoubleForwardSlash = []byte("//") 21 var InvalidURL = []byte("<red>[Invalid URL]</red>") 22 var InvalidTopic = []byte("<red>[Invalid Topic]</red>") 23 var InvalidProfile = []byte("<red>[Invalid Profile]</red>") 24 var InvalidForum = []byte("<red>[Invalid Forum]</red>") 25 var unknownMedia = []byte("<red>[Unknown Media]</red>") 26 var URLOpen = []byte("<a href='") 27 var URLOpenUser = []byte("<a rel='ugc'href='") 28 var URLOpen2 = []byte("'>") 29 var bytesSinglequote = []byte("'") 30 var bytesGreaterThan = []byte(">") 31 var urlMention = []byte("'class='mention'") 32 var URLClose = []byte("</a>") 33 var videoOpen = []byte("<video controls src=\"") 34 var videoOpen2 = []byte("\"><a class='attach'href=\"") 35 var videoClose = []byte("\"download>Attachment</a></video>") 36 var audioOpen = []byte("<audio controls src=\"") 37 var audioOpen2 = []byte("\"><a class='attach'href=\"") 38 var audioClose = []byte("\"download>Attachment</a></audio>") 39 var imageOpen = []byte("<a href=\"") 40 var imageOpen2 = []byte("\"><img src='") 41 var imageClose = []byte("'class='postImage'></a>") 42 var attachOpen = []byte("<a class='attach'href=\"") 43 var attachClose = []byte("\"download>Attachment</a>") 44 var sidParam = []byte("?sid=") 45 var stypeParam = []byte("&stype=") 46 47 /*var textShortOpen = []byte("<a class='attach'href=\"") 48 var textShortOpen2 = []byte("\">View</a> / <a class='attach'href=\"") 49 var textShortClose = []byte("\"download>Download</a>")*/ 50 var textOpen = []byte("<div class='attach_box'><div class='attach_info'>") 51 var textOpen2 = []byte("</div><div class='attach_opts'><a class='attach'href=\"") 52 var textOpen3 = []byte("\">View</a> / <a class='attach'href=\"") 53 var textClose = []byte("\"download>Download</a></div></div>") 54 var urlPattern = `(?s)([ {1}])((http|https|ftp|mailto)*)(:{??)\/\/([\.a-zA-Z\/]+)([ {1}])` 55 var urlReg *regexp.Regexp 56 57 const imageSizeHint = len("<a href=\"") + len("\"><img src='") + len("'class='postImage'></a>") 58 const videoSizeHint = len("<video controls src=\"") + len("\"><a class='attach'href=\"") + len("\"download>Attachment</a></video>") + len("?sid=") + len("&stype=") + 8 59 const audioSizeHint = len("<audio controls src=\"") + len("\"><a class='attach'href=\"") + len("\"download>Attachment</a></audio>") + len("?sid=") + len("&stype=") + 8 60 const mentionSizeHint = len("<a href='") + len("'class='mention'") + len(">@") + len("</a>") 61 62 func init() { 63 urlReg = regexp.MustCompile(urlPattern) 64 } 65 66 var emojis map[string]string 67 68 type emojiHolder struct { 69 NoDefault bool `json:"no_defaults"` 70 Emojis []map[string]string `json:"emojis"` 71 } 72 73 func InitEmoji() error { 74 var emoji emojiHolder 75 err := unmarshalJsonFile("./config/emoji_default.json", &emoji) 76 if err != nil { 77 return err 78 } 79 80 emojis = make(map[string]string, len(emoji.Emojis)) 81 if !emoji.NoDefault { 82 for _, item := range emoji.Emojis { 83 for ikey, ival := range item { 84 emojis[ikey] = ival 85 } 86 } 87 } 88 89 emoji = emojiHolder{} 90 err = unmarshalJsonFileIgnore404("./config/emoji.json", &emoji) 91 if err != nil { 92 return err 93 } 94 if emoji.NoDefault { 95 emojis = make(map[string]string) 96 } 97 98 for _, item := range emoji.Emojis { 99 for ikey, ival := range item { 100 emojis[ikey] = ival 101 } 102 } 103 104 return nil 105 } 106 107 // TODO: Write a test for this 108 func shortcodeToUnicode(msg string) string { 109 //re := regexp.MustCompile(":(.):") 110 for shortcode, emoji := range emojis { 111 msg = strings.Replace(msg, shortcode, emoji, -1) 112 } 113 return msg 114 } 115 116 type TagToAction struct { 117 Suffix string 118 Do func(*TagToAction, bool, int, []rune) (int, string) // func(tagToAction,open,i,runes) (newI, output) 119 Depth int // For use by Do 120 PartialMode bool 121 } 122 123 // TODO: Write a test for this 124 func tryStepForward(i, step int, runes []rune) (int, bool) { 125 i += step 126 if i < len(runes) { 127 return i, true 128 } 129 return i - step, false 130 } 131 132 // TODO: Write a test for this 133 func tryStepBackward(i, step int, runes []rune) (int, bool) { 134 if i == 0 { 135 return i, false 136 } 137 return i - 1, true 138 } 139 140 // TODO: Preparse Markdown and normalize it into HTML? 141 // TODO: Use a string builder 142 func PreparseMessage(msg string) string { 143 // TODO: Kick this check down a level into SanitiseBody? 144 if !utf8.ValidString(msg) { 145 return "" 146 } 147 msg = strings.Replace(msg, "<p><br>", "\n\n", -1) 148 msg = strings.Replace(msg, "<p>", "\n\n", -1) 149 msg = strings.Replace(msg, "</p>", "", -1) 150 // TODO: Make this looser by moving it to the reverse HTML parser? 151 msg = strings.Replace(msg, "<br>", "\n\n", -1) 152 msg = strings.Replace(msg, "<br />", "\n\n", -1) // XHTML style 153 msg = strings.Replace(msg, " ", "", -1) 154 msg = strings.Replace(msg, "\r", "", -1) // Windows artifact 155 //msg = strings.Replace(msg, "\n\n\n\n", "\n\n\n", -1) 156 msg = GetHookTable().Sshook("preparse_preassign", msg) 157 // There are a few useful cases for having spaces, but I'd like to stop the WYSIWYG from inserting random lines here and there 158 msg = SanitiseBody(msg) 159 160 runes := []rune(msg) 161 msg = "" 162 163 // TODO: We can maybe reduce the size of this by using an offset? 164 // TODO: Move some of these closures out of this function to make things a little more efficient 165 allowedTags := [][]string{ 166 'e': {"m"}, 167 's': {"", "trong", "poiler", "pan"}, 168 'd': {"el"}, 169 'u': {""}, 170 'b': {"", "lockquote"}, 171 'i': {""}, 172 'h': {"1", "2", "3"}, 173 //'p': {""}, 174 'g': {""}, // Quick and dirty fix for Grammarly 175 } 176 buildLitMatch := func(tag string) func(*TagToAction, bool, int, []rune) (int, string) { 177 return func(action *TagToAction, open bool, _ int, _ []rune) (int, string) { 178 if open { 179 action.Depth++ 180 return -1, "<" + tag + ">" 181 } 182 if action.Depth <= 0 { 183 return -1, "" 184 } 185 action.Depth-- 186 return -1, "</" + tag + ">" 187 } 188 } 189 tagToAction := [][]*TagToAction{ 190 'e': {{"m", buildLitMatch("em"), 0, false}}, 191 's': { 192 {"", buildLitMatch("del"), 0, false}, 193 {"trong", buildLitMatch("strong"), 0, false}, 194 {"poiler", buildLitMatch("spoiler"), 0, false}, 195 // Hides the span tags Trumbowyg loves blasting out randomly 196 {"pan", func(act *TagToAction, open bool, i int, runes []rune) (int, string) { 197 if open { 198 act.Depth++ 199 //fmt.Println("skipping attributes") 200 for ; i < len(runes); i++ { 201 if runes[i] == '&' && peekMatch(i, "gt;", runes) { 202 //fmt.Println("found tag exit") 203 return i + 3, " " 204 } 205 } 206 return -1, " " 207 } 208 if act.Depth <= 0 { 209 return -1, " " 210 } 211 act.Depth-- 212 return -1, " " 213 }, 0, true}, 214 }, 215 'd': {{"el", buildLitMatch("del"), 0, false}}, 216 'u': {{"", buildLitMatch("u"), 0, false}}, 217 'b': { 218 {"", buildLitMatch("strong"), 0, false}, 219 {"lockquote", buildLitMatch("blockquote"), 0, false}, 220 }, 221 'i': {{"", buildLitMatch("em"), 0, false}}, 222 'h': { 223 {"1", buildLitMatch("h2"), 0, false}, 224 {"2", buildLitMatch("h3"), 0, false}, 225 {"3", buildLitMatch("h4"), 0, false}, 226 }, 227 //'p': {{"", buildLitMatch2("\n\n", ""), 0, false}}, 228 'g': { 229 {"", func(act *TagToAction, open bool, i int, runes []rune) (int, string) { 230 if open { 231 act.Depth++ 232 //fmt.Println("skipping attributes") 233 for ; i < len(runes); i++ { 234 if runes[i] == '&' && peekMatch(i, "gt;", runes) { 235 //fmt.Println("found tag exit") 236 return i + 3, " " 237 } 238 } 239 return -1, " " 240 } 241 if act.Depth <= 0 { 242 return -1, " " 243 } 244 act.Depth-- 245 return -1, " " 246 }, 0, true}, 247 }, 248 } 249 // TODO: Implement a less literal parser 250 // TODO: Use a string builder 251 // TODO: Implement faster emoji parser 252 for i := 0; i < len(runes); i++ { 253 char := runes[i] 254 // TODO: Make the slashes escapable too in case someone means to use a literaly slash, maybe as an example of how to escape elements? 255 if char == '\\' { 256 if peekMatch(i, "<", runes) { 257 msg += "&" 258 i++ 259 } 260 } else if char == '&' && peekMatch(i, "lt;", runes) { 261 var ok bool 262 i, ok = tryStepForward(i, 4, runes) 263 if !ok { 264 msg += "<" 265 break 266 } 267 char := runes[i] 268 if int(char) >= len(allowedTags) { 269 //fmt.Println("sentinel char out of bounds") 270 msg += "&" 271 i -= 4 272 continue 273 } 274 275 var closeTag bool 276 if char == '/' { 277 //fmt.Println("found close tag") 278 i, ok = tryStepForward(i, 1, runes) 279 if !ok { 280 msg += "</" 281 break 282 } 283 char = runes[i] 284 closeTag = true 285 } 286 287 tags := allowedTags[char] 288 if len(tags) == 0 { 289 //fmt.Println("couldn't find char in allowedTags") 290 msg += "&" 291 if closeTag { 292 //msg += "</" 293 //msg += "&" 294 i -= 5 295 } else { 296 //msg += "&" 297 i -= 4 298 } 299 continue 300 } 301 // TODO: Scan through tags and make sure the suffix is present to reduce the number of false positives which hit the loop below 302 //fmt.Printf("tags: %+v\n", tags) 303 304 newI := -1 305 var out string 306 toActionList := tagToAction[char] 307 for _, toAction := range toActionList { 308 // TODO: Optimise this, maybe with goto or a function call to avoid scanning the text twice? 309 if (toAction.PartialMode && !closeTag && peekMatch(i, toAction.Suffix, runes)) || peekMatch(i, toAction.Suffix+">", runes) { 310 newI, out = toAction.Do(toAction, !closeTag, i, runes) 311 if newI != -1 { 312 i = newI 313 } else if out != "" { 314 i += len(toAction.Suffix + ">") 315 } 316 break 317 } 318 } 319 if out == "" { 320 msg += "&" 321 if closeTag { 322 i -= 5 323 } else { 324 i -= 4 325 } 326 } else if out != " " { 327 msg += out 328 } 329 } else if char == '@' && (i == 0 || runes[i-1] < 33) { 330 // TODO: Handle usernames containing spaces, maybe in the front-end with AJAX 331 // Do not mention-ify ridiculously long things 332 var ok bool 333 i, ok = tryStepForward(i, 1, runes) 334 if !ok { 335 msg += "@" 336 continue 337 } 338 start := i 339 340 for j := 0; i < len(runes) && j < Config.MaxUsernameLength; j++ { 341 cchar := runes[i] 342 if cchar < 33 { 343 break 344 } 345 i++ 346 } 347 348 username := string(runes[start:i]) 349 if username == "" { 350 msg += "@" 351 i = start - 1 352 continue 353 } 354 355 user, err := Users.GetByName(username) 356 if err != nil { 357 if err != ErrNoRows { 358 LogError(err) 359 } 360 msg += "@" 361 i = start - 1 362 continue 363 } 364 msg += "@" + strconv.Itoa(user.ID) 365 i-- 366 } else { 367 msg += string(char) 368 } 369 } 370 371 for _, actionList := range tagToAction { 372 for _, toAction := range actionList { 373 if toAction.Depth > 0 { 374 for ; toAction.Depth > 0; toAction.Depth-- { 375 _, out := toAction.Do(toAction, false, len(runes), runes) 376 if out != "" { 377 msg += out 378 } 379 } 380 } 381 } 382 } 383 return strings.TrimSpace(shortcodeToUnicode(msg)) 384 } 385 386 // TODO: Test this 387 // TODO: Use this elsewhere in the parser? 388 func peek(cur, skip int, runes []rune) rune { 389 if (cur + skip) < len(runes) { 390 return runes[cur+skip] 391 } 392 return 0 // null byte 393 } 394 395 // TODO: Test this 396 func peekMatch(cur int, phrase string, runes []rune) bool { 397 if cur+len(phrase) > len(runes) { 398 return false 399 } 400 for i, char := range phrase { 401 if cur+i+1 >= len(runes) { 402 return false 403 } 404 if runes[cur+i+1] != char { 405 return false 406 } 407 } 408 return true 409 } 410 411 // ! Not concurrency safe 412 func AddHashLinkType(prefix string, h func(*strings.Builder, string, *int)) { 413 // There can only be one hash link type starting with a specific character at the moment 414 hashType := hashLinkTypes[prefix[0]] 415 if hashType != "" { 416 return 417 } 418 hashLinkMap[prefix] = h 419 hashLinkTypes[prefix[0]] = prefix 420 } 421 422 func WriteURL(sb *strings.Builder, url, label string) { 423 sb.Write(URLOpen) 424 sb.WriteString(url) 425 sb.Write(URLOpen2) 426 sb.WriteString(label) 427 sb.Write(URLClose) 428 } 429 430 var hashLinkTypes = []string{'t': "tid-", 'r': "rid-", 'f': "fid-"} 431 var hashLinkMap = map[string]func(*strings.Builder, string, *int){ 432 "tid-": func(sb *strings.Builder, msg string, i *int) { 433 tid, intLen := CoerceIntString(msg[*i:]) 434 *i += intLen 435 436 topic, err := Topics.Get(tid) 437 if err != nil || !Forums.Exists(topic.ParentID) { 438 sb.Write(InvalidTopic) 439 return 440 } 441 WriteURL(sb, BuildTopicURL("", tid), "#tid-"+strconv.Itoa(tid)) 442 }, 443 "rid-": func(sb *strings.Builder, msg string, i *int) { 444 rid, intLen := CoerceIntString(msg[*i:]) 445 *i += intLen 446 447 topic, err := TopicByReplyID(rid) 448 if err != nil || !Forums.Exists(topic.ParentID) { 449 sb.Write(InvalidTopic) 450 return 451 } 452 // TODO: Send the user to the right page and post not just the right topic? 453 WriteURL(sb, BuildTopicURL("", topic.ID), "#rid-"+strconv.Itoa(rid)) 454 }, 455 "fid-": func(sb *strings.Builder, msg string, i *int) { 456 fid, intLen := CoerceIntString(msg[*i:]) 457 *i += intLen 458 459 if !Forums.Exists(fid) { 460 sb.Write(InvalidForum) 461 return 462 } 463 WriteURL(sb, BuildForumURL("", fid), "#fid-"+strconv.Itoa(fid)) 464 }, 465 // TODO: Forum Shortcode Link 466 } 467 468 // TODO: Pack multiple bit flags into an integer instead of using a struct? 469 var DefaultParseSettings = &ParseSettings{} 470 471 type ParseSettings struct { 472 NoEmbed bool 473 } 474 475 func (ps *ParseSettings) CopyPtr() *ParseSettings { 476 n := &ParseSettings{} 477 *n = *ps 478 return n 479 } 480 481 func ParseMessage(msg string, sectionID int, sectionType string, settings *ParseSettings, user *User) string { 482 msg, _ = ParseMessage2(msg, sectionID, sectionType, settings, user) 483 return msg 484 } 485 486 var litRepPrefix = []byte{':', ';'} 487 488 //var litRep = [][]byte{':':{')','(','D','O','o','P','p'},';':{')'}} 489 var litRep = [][]string{':': {')': "😀", '(': "😞", 'D': "😃", 'O': "😲", 'o': "😲", 'P': "😛", 'p': "😛"}, ';': {')': "😉"}} 490 491 // TODO: Write a test for this 492 // TODO: We need a lot more hooks here. E.g. To add custom media types and handlers. 493 // TODO: Use templates to reduce the amount of boilerplate? 494 func ParseMessage2(msg string, sectionID int, sectionType string, settings *ParseSettings, user *User) (string, bool) { 495 if settings == nil { 496 settings = DefaultParseSettings 497 } 498 if user == nil { 499 user = &GuestUser 500 } 501 // TODO: Word boundary detection for these to avoid mangling code 502 /*rep := func(find, replace string) { 503 msg = strings.Replace(msg, find, replace, -1) 504 } 505 rep(":)", "😀") 506 rep(":(", "😞") 507 rep(":D", "😃") 508 rep(":P", "😛") 509 rep(":O", "😲") 510 rep(":p", "😛") 511 rep(":o", "😲") 512 rep(";)", "😉")*/ 513 514 // Word filter list. E.g. Swear words and other things the admins don't like 515 filters, err := WordFilters.GetAll() 516 if err != nil { 517 LogError(err) 518 return "", false 519 } 520 for _, f := range filters { 521 msg = strings.Replace(msg, f.Find, f.Replace, -1) 522 } 523 if len(msg) < 2 { 524 msg = strings.Replace(msg, "\n", "<br>", -1) 525 msg = GetHookTable().Sshook("parse_assign", msg) 526 return msg, false 527 } 528 529 // Search for URLs, mentions and hashlinks in the messages... 530 var sb strings.Builder 531 lastItem := 0 532 i := 0 533 var externalHead bool 534 //var c bool 535 //fmt.Println("msg:", "'"+msg+"'") 536 for ; len(msg) > i; i++ { 537 //fmt.Printf("msg[%d]: %s\n",i,string(msg[i])) 538 if (i == 0 && (msg[0] > 32)) || (len(msg) > (i+1) && (msg[i] < 33) && (msg[i+1] > 32)) { 539 //fmt.Println("s1") 540 if (i != 0) || msg[i] < 33 { 541 i++ 542 } 543 if len(msg) <= (i + 1) { 544 break 545 } 546 //fmt.Println("s2") 547 ch := msg[i] 548 549 // Very short literal matcher 550 if len(litRep) > int(ch) { 551 sl := litRep[ch] 552 if sl != nil { 553 i++ 554 ch := msg[i] 555 if len(sl) > int(ch) { 556 val := sl[ch] 557 if val != "" { 558 i-- 559 sb.WriteString(msg[lastItem:i]) 560 i++ 561 sb.WriteString(val) 562 i++ 563 lastItem = i 564 i-- 565 continue 566 } 567 } 568 i-- 569 } 570 //lastItem = i 571 //i-- 572 //continue 573 } 574 575 switch ch { 576 case '#': 577 //fmt.Println("msg[i+1]:", msg[i+1]) 578 //fmt.Println("string(msg[i+1]):", string(msg[i+1])) 579 hashType := hashLinkTypes[msg[i+1]] 580 if hashType == "" { 581 //fmt.Println("uh1") 582 sb.WriteString(msg[lastItem:i]) 583 i++ 584 lastItem = i 585 continue 586 } 587 //fmt.Println("hashType:", hashType) 588 if len(msg) <= (i + len(hashType) + 1) { 589 sb.WriteString(msg[lastItem:i]) 590 lastItem = i 591 continue 592 } 593 if msg[i+1:i+len(hashType)+1] != hashType { 594 continue 595 } 596 597 //fmt.Println("msg[lastItem:i]:", msg[lastItem:i]) 598 sb.WriteString(msg[lastItem:i]) 599 i += len(hashType) + 1 600 hashLinkMap[hashType](&sb, msg, &i) 601 lastItem = i 602 i-- 603 case '@': 604 sb.WriteString(msg[lastItem:i]) 605 i++ 606 start := i 607 uid, intLen := CoerceIntString(msg[start:]) 608 i += intLen 609 610 var menUser *User 611 if uid != 0 && user.ID == uid { 612 menUser = user 613 } else { 614 menUser = Users.Getn(uid) 615 if menUser == nil { 616 sb.Write(InvalidProfile) 617 lastItem = i 618 i-- 619 continue 620 } 621 } 622 623 sb.Grow(mentionSizeHint + len(menUser.Link) + len(menUser.Name)) 624 sb.Write(URLOpen) 625 sb.WriteString(menUser.Link) 626 sb.Write(urlMention) 627 sb.Write(bytesGreaterThan) 628 sb.WriteByte('@') 629 sb.WriteString(menUser.Name) 630 sb.Write(URLClose) 631 lastItem = i 632 i-- 633 case 'h', 'f', 'g', '/', 'i': 634 //fmt.Println("s3") 635 fch := msg[i+1] 636 if msg[i] == 'h' && fch == 't' && len(msg) > i+5 && msg[i+2] == 't' && msg[i+3] == 'p' { 637 if msg[i+4] == 's' && msg[i+5] == ':' && len(msg) > i+6 && msg[i+6] == '/' { 638 // Do nothing 639 } else if msg[i+4] == ':' && msg[i+5] == '/' { 640 // Do nothing 641 } else { 642 continue 643 } 644 } else if len(msg) > i+4 { 645 if fch == 't' && msg[i+2] == 'p' && msg[i+3] == ':' && msg[i+4] == '/' && msg[i] == 'f' { 646 // Do nothing 647 } else if fch == 'i' && msg[i+2] == 't' && msg[i+3] == ':' && msg[i+4] == '/' && msg[i] == 'g' { 648 // Do nothing 649 } else if msg[i] == 'i' && fch == 'p' && msg[i+2] == 'f' && msg[i+3] == 's' { 650 // Do nothing 651 } else if msg[i] == 'i' && fch == 'p' && msg[i+2] == 'n' && msg[i+3] == 's' { 652 // Do nothing 653 } else if fch == '/' && msg[i] == '/' { 654 // Do nothing 655 } else { 656 continue 657 } 658 } else if fch == '/' && msg[i] == '/' { 659 // Do nothing 660 } else { 661 continue 662 } 663 if !user.Perms.AutoLink { 664 continue 665 } 666 667 //fmt.Println("p1:",i) 668 sb.WriteString(msg[lastItem:i]) 669 urlLen, ok := PartialURLStringLen(msg[i:]) 670 if len(msg) < i+urlLen { 671 //fmt.Println("o1") 672 if urlLen == 2 { 673 sb.Write(DoubleForwardSlash) 674 } else { 675 sb.Write(InvalidURL) 676 } 677 i += len(msg) - 1 678 lastItem = i 679 break 680 } 681 if urlLen == 2 { 682 sb.Write(DoubleForwardSlash) 683 i += urlLen 684 lastItem = i 685 i-- 686 continue 687 } 688 //fmt.Println("msg[i:i+urlLen]:", "'"+msg[i:i+urlLen]+"'") 689 if !ok { 690 //fmt.Printf("o2: i = %d; i+urlLen = %d\n",i,i+urlLen) 691 sb.Write(InvalidURL) 692 i += urlLen 693 lastItem = i 694 i-- 695 continue 696 } 697 698 media, ok := parseMediaString(msg[i:i+urlLen], settings) 699 if !ok { 700 //fmt.Println("o3") 701 sb.Write(InvalidURL) 702 i += urlLen 703 lastItem = i 704 continue 705 } 706 //fmt.Println("p2") 707 708 addImage := func(url string) { 709 sb.Grow(imageSizeHint + len(url) + len(url)) 710 sb.Write(imageOpen) 711 sb.WriteString(url) 712 sb.Write(imageOpen2) 713 sb.WriteString(url) 714 sb.Write(imageClose) 715 i += urlLen 716 lastItem = i 717 } 718 719 // TODO: Reduce the amount of code duplication 720 // TODO: Avoid allocating a string for media.Type? 721 switch media.Type { 722 case AImage: 723 addImage(media.URL + "?sid=" + strconv.Itoa(sectionID) + "&stype=" + sectionType) 724 continue 725 case AVideo: 726 sb.Grow(videoSizeHint + (len(media.URL) + len(sectionType)*2)) 727 sb.Write(videoOpen) 728 sb.WriteString(media.URL) 729 sb.Write(sidParam) 730 sb.WriteString(strconv.Itoa(sectionID)) 731 sb.Write(stypeParam) 732 sb.WriteString(sectionType) 733 sb.Write(videoOpen2) 734 sb.WriteString(media.URL) 735 sb.Write(sidParam) 736 sb.WriteString(strconv.Itoa(sectionID)) 737 sb.Write(stypeParam) 738 sb.WriteString(sectionType) 739 sb.Write(videoClose) 740 i += urlLen 741 lastItem = i 742 continue 743 case AAudio: 744 sb.Grow(audioSizeHint + (len(media.URL) + len(sectionType)*2)) 745 sb.Write(audioOpen) 746 sb.WriteString(media.URL) 747 sb.Write(sidParam) 748 sb.WriteString(strconv.Itoa(sectionID)) 749 sb.Write(stypeParam) 750 sb.WriteString(sectionType) 751 sb.Write(audioOpen2) 752 sb.WriteString(media.URL) 753 sb.Write(sidParam) 754 sb.WriteString(strconv.Itoa(sectionID)) 755 sb.Write(stypeParam) 756 sb.WriteString(sectionType) 757 sb.Write(audioClose) 758 i += urlLen 759 lastItem = i 760 continue 761 case EImage: 762 addImage(media.URL) 763 continue 764 case AText: 765 /*sb.Write(textOpen) 766 sb.WriteString(media.URL) 767 sb.Write(sidParam) 768 sid := strconv.Itoa(sectionID) 769 sb.WriteString(sid) 770 sb.Write(stypeParam) 771 sb.WriteString(sectionType) 772 sb.Write(textOpen2) 773 sb.WriteString(media.URL) 774 sb.Write(sidParam) 775 sb.WriteString(sid) 776 sb.Write(stypeParam) 777 sb.WriteString(sectionType) 778 sb.Write(textClose) 779 i += urlLen 780 lastItem = i 781 continue*/ 782 sb.Write(textOpen) 783 sb.WriteString(media.URL) 784 sb.Write(textOpen2) 785 sb.WriteString(media.URL) 786 sb.Write(sidParam) 787 sid := strconv.Itoa(sectionID) 788 sb.WriteString(sid) 789 sb.Write(stypeParam) 790 sb.WriteString(sectionType) 791 sb.Write(textOpen3) 792 sb.WriteString(media.URL) 793 sb.Write(sidParam) 794 sb.WriteString(sid) 795 sb.Write(stypeParam) 796 sb.WriteString(sectionType) 797 sb.Write(textClose) 798 i += urlLen 799 lastItem = i 800 continue 801 case AOther: 802 sb.Write(attachOpen) 803 sb.WriteString(media.URL) 804 sb.Write(sidParam) 805 sb.WriteString(strconv.Itoa(sectionID)) 806 sb.Write(stypeParam) 807 sb.WriteString(sectionType) 808 sb.Write(attachClose) 809 i += urlLen 810 lastItem = i 811 continue 812 case ERaw: 813 sb.WriteString(media.Body) 814 i += urlLen 815 lastItem = i 816 continue 817 case ERawExternal: 818 sb.WriteString(media.Body) 819 i += urlLen 820 lastItem = i 821 externalHead = true 822 continue 823 case ENone: 824 // Do nothing 825 // TODO: Add support for media plugins 826 default: 827 sb.Write(unknownMedia) 828 i += urlLen 829 continue 830 } 831 //fmt.Println("p3") 832 833 // TODO: Add support for rel="ugc" 834 sb.Grow(len(URLOpen) + (len(msg[i:i+urlLen]) * 2) + len(URLOpen2) + len(URLClose)) 835 if media.Trusted { 836 sb.Write(URLOpen) 837 } else { 838 sb.Write(URLOpenUser) 839 } 840 sb.WriteString(media.URL) 841 sb.Write(URLOpen2) 842 sb.WriteString(media.FURL) 843 sb.Write(URLClose) 844 i += urlLen 845 lastItem = i 846 i-- 847 } 848 } 849 } 850 if lastItem != i && sb.Len() != 0 { 851 /*calclen := len(msg) 852 if calclen <= lastItem { 853 calclen = lastItem 854 }*/ 855 //if i == len(msg) { 856 sb.WriteString(msg[lastItem:]) 857 /*} else { 858 sb.WriteString(msg[lastItem:calclen]) 859 }*/ 860 } 861 if sb.Len() != 0 { 862 msg = sb.String() 863 //fmt.Println("sb.String():", "'"+sb.String()+"'") 864 } 865 866 msg = strings.Replace(msg, "\n", "<br>", -1) 867 msg = GetHookTable().Sshook("parse_assign", msg) 868 return msg, externalHead 869 } 870 871 // 6, 7, 8, 6, 2, 7 872 // ftp://, http://, https://, git://, ipfs://, ipns://, //, mailto: (not a URL, just here for length comparison purposes) 873 // TODO: Write a test for this 874 func validateURLString(d string) bool { 875 i := 0 876 if len(d) >= 6 { 877 if d[0:6] == "ftp://" || d[0:6] == "git://" { 878 i = 6 879 } else if len(d) >= 7 && (d[0:7] == "http://" || d[0:7] == "ipfs://" || d[0:7] == "ipns://") { 880 i = 7 881 } else if len(d) >= 8 && d[0:8] == "https://" { 882 i = 8 883 } 884 } else if len(d) >= 2 && d[0] == '/' && d[1] == '/' { 885 i = 2 886 } 887 888 // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. 889 for ; len(d) > i; i++ { 890 ch := d[i] 891 if ch != '\\' && ch != '_' && ch != '?' && ch != '&' && ch != '=' && ch != '@' && ch != '#' && ch != ']' && !(ch > 44 && ch < 60) && !(ch > 64 && ch < 92) && !(ch > 96 && ch < 123) { // 57 is 9, 58 is :, 59 is ;, 90 is Z, 91 is [ 892 return false 893 } 894 } 895 return true 896 } 897 898 // TODO: Write a test for this 899 func validatedURLBytes(data []byte) (url []byte) { 900 datalen := len(data) 901 i := 0 902 if datalen >= 6 { 903 if bytes.Equal(data[0:6], []byte("ftp://")) || bytes.Equal(data[0:6], []byte("git://")) { 904 i = 6 905 } else if datalen >= 7 && bytes.Equal(data[0:7], httpProtBytes) { 906 i = 7 907 } else if datalen >= 8 && bytes.Equal(data[0:8], []byte("https://")) { 908 i = 8 909 } 910 } else if datalen >= 2 && data[0] == '/' && data[1] == '/' { 911 i = 2 912 } 913 914 // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. 915 for ; datalen > i; i++ { 916 ch := data[i] 917 if ch != '\\' && ch != '_' && ch != '?' && ch != '&' && ch != '=' && ch != '@' && ch != '#' && ch != ']' && !(ch > 44 && ch < 60) && !(ch > 64 && ch < 92) && !(ch > 96 && ch < 123) { // 57 is 9, 58 is :, 59 is ;, 90 is Z, 91 is [ 918 return InvalidURL 919 } 920 } 921 922 url = append(url, data...) 923 return url 924 } 925 926 // TODO: Write a test for this 927 func PartialURLString(d string) (url []byte) { 928 i := 0 929 end := len(d) - 1 930 if len(d) >= 6 { 931 if d[0:6] == "ftp://" || d[0:6] == "git://" { 932 i = 6 933 } else if len(d) >= 7 && (d[0:7] == "http://" || d[0:7] == "ipfs://" || d[0:7] == "ipns://") { 934 i = 7 935 } else if len(d) >= 8 && d[0:8] == "https://" { 936 i = 8 937 } 938 } else if len(d) >= 2 && d[0] == '/' && d[1] == '/' { 939 i = 2 940 } 941 942 // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. 943 for ; end >= i; i++ { 944 ch := d[i] 945 if ch != '\\' && ch != '_' && ch != '?' && ch != '&' && ch != '=' && ch != '@' && ch != '#' && ch != ']' && !(ch > 44 && ch < 60) && !(ch > 64 && ch < 92) && !(ch > 96 && ch < 123) { // 57 is 9, 58 is :, 59 is ;, 90 is Z, 91 is [ 946 end = i 947 } 948 } 949 950 url = append(url, []byte(d[0:end])...) 951 return url 952 } 953 954 // TODO: Write a test for this 955 // TODO: Handle the host bits differently from the paths... 956 func PartialURLStringLen(d string) (int, bool) { 957 i := 0 958 if len(d) >= 6 { 959 //log.Print(string(d[0:5])) 960 if d[0:6] == "ftp://" || d[0:6] == "git://" { 961 i = 6 962 } else if len(d) >= 7 && (d[0:7] == "http://" || d[0:7] == "ipfs://" || d[0:7] == "ipns://") { 963 i = 7 964 } else if len(d) >= 8 && d[0:8] == "https://" { 965 i = 8 966 } 967 } else if len(d) >= 2 && d[0] == '/' && d[1] == '/' { 968 i = 2 969 } 970 //fmt.Println("Data Length: ",len(d)) 971 if len(d) < i { 972 //fmt.Println("e1:",i) 973 return i + 1, false 974 } 975 976 // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. 977 f := i 978 //fmt.Println("f:",f) 979 for ; len(d) > i; i++ { 980 ch := d[i] //char 981 if ch < 33 { // space and invisibles 982 //fmt.Println("e2:",i) 983 return i, i != f 984 } else if ch != '\\' && ch != '_' && ch != '?' && ch != '&' && ch != '=' && ch != '@' && ch != '#' && ch != ']' && !(ch > 44 && ch < 60) && !(ch > 64 && ch < 92) && !(ch > 96 && ch < 123) { // 57 is 9, 58 is :, 59 is ;, 90 is Z, 91 is [ 985 //log.Print("Bad Character: ", ch) 986 //fmt.Println("e3") 987 return i, false 988 } 989 } 990 991 //fmt.Println("e4:", i) 992 /*if data[i-1] < 33 { 993 return i-1, i != f 994 }*/ 995 //fmt.Println("e5") 996 return i, i != f 997 } 998 999 // TODO: Write a test for this 1000 // TODO: Get this to support IPv6 hosts, this isn't currently done as this is used in the bbcode plugin where it thinks the [ is a IPv6 host 1001 func PartialURLStringLen2(d string) int { 1002 i := 0 1003 if len(d) >= 6 { 1004 //log.Print(string(d[0:5])) 1005 if d[0:6] == "ftp://" || d[0:6] == "git://" { 1006 i = 6 1007 } else if len(d) >= 7 && (d[0:7] == "http://" || d[0:7] == "ipfs://" || d[0:7] == "ipns://") { 1008 i = 7 1009 } else if len(d) >= 8 && d[0:8] == "https://" { 1010 i = 8 1011 } 1012 } else if len(d) >= 2 && d[0] == '/' && d[1] == '/' { 1013 i = 2 1014 } 1015 1016 // ? - There should only be one : and that's only if the URL is on a non-standard port. Same for ?s. 1017 for ; len(d) > i; i++ { 1018 ch := d[i] 1019 if ch != '\\' && ch != '_' && ch != '?' && ch != '&' && ch != '=' && ch != '@' && ch != '#' && ch != ']' && !(ch > 44 && ch < 60) && !(ch > 64 && ch < 91) && !(ch > 96 && ch < 123) { // 57 is 9, 58 is :, 59 is ;, 90 is Z, 91 is [ 1020 //log.Print("Bad Character: ", ch) 1021 return i 1022 } 1023 } 1024 //log.Print("Data Length: ",len(d)) 1025 return len(d) 1026 } 1027 1028 type MediaEmbed struct { 1029 //Type string //image 1030 Type int 1031 URL string 1032 FURL string 1033 Body string 1034 1035 Trusted bool // samesite urls 1036 } 1037 1038 const ( 1039 ENone = iota 1040 ERaw 1041 ERawExternal 1042 EImage 1043 AImage 1044 AVideo 1045 AAudio 1046 AText 1047 AOther 1048 ) 1049 1050 var LastEmbedID = AOther 1051 1052 // TODO: Write a test for this 1053 func parseMediaString(data string, settings *ParseSettings) (media MediaEmbed, ok bool) { 1054 if !validateURLString(data) { 1055 return media, false 1056 } 1057 uurl, err := url.Parse(data) 1058 if err != nil { 1059 return media, false 1060 } 1061 host := uurl.Hostname() 1062 scheme := uurl.Scheme 1063 if scheme == "ipfs" { 1064 media.FURL = data 1065 media.URL = media.FURL 1066 return media, true 1067 } 1068 port := uurl.Port() 1069 query, err := url.ParseQuery(uurl.RawQuery) 1070 if err != nil { 1071 return media, false 1072 } 1073 //fmt.Println("host:", host) 1074 //log.Print("Site.URL:",Site.URL) 1075 1076 samesite := (host == "localhost" || host == "127.0.0.1" || host == "::1" || host == Site.URL) && scheme != "ipns" 1077 if samesite { 1078 host = strings.Split(Site.URL, ":")[0] 1079 // ?- Test this as I'm not sure it'll do what it should. If someone's running SSL on port 80 or non-SSL on port 443 then... Well... They're in far worse trouble than this... 1080 port = Site.Port 1081 if Config.SslSchema { 1082 scheme = "https" 1083 } 1084 } 1085 if scheme != "" { 1086 scheme += ":" 1087 } 1088 media.Trusted = samesite 1089 1090 path := uurl.EscapedPath() 1091 //fmt.Println("path:", path) 1092 pathFrags := strings.Split(path, "/") 1093 if len(pathFrags) >= 2 { 1094 if samesite && pathFrags[1] == "attachs" && (scheme == "http:" || scheme == "https:") { 1095 var sport string 1096 // ? - Assumes the sysadmin hasn't mixed up the two standard ports 1097 if port != "443" && port != "80" && port != "" { 1098 sport = ":" + port 1099 } 1100 media.URL = scheme + "//" + host + sport + path 1101 ext := strings.TrimPrefix(filepath.Ext(path), ".") 1102 if len(ext) == 0 { 1103 // TODO: Write a unit test for this 1104 return media, false 1105 } 1106 switch { 1107 case ImageFileExts.Contains(ext): 1108 media.Type = AImage 1109 case WebVideoFileExts.Contains(ext): 1110 media.Type = AVideo 1111 case WebAudioFileExts.Contains(ext): 1112 media.Type = AAudio 1113 case TextFileExts.Contains(ext): 1114 media.Type = AText 1115 default: 1116 media.Type = AOther 1117 } 1118 return media, true 1119 } 1120 } 1121 1122 //fmt.Printf("settings.NoEmbed: %+v\n", settings.NoEmbed) 1123 //settings.NoEmbed = false 1124 if !settings.NoEmbed { 1125 // ? - I don't think this hostname will hit every YT domain 1126 // TODO: Make this a more customisable handler rather than hard-coding it in here 1127 ytInvalid := func(v string) bool { 1128 for _, ch := range v { 1129 if !((ch > 47 && ch < 58) || (ch > 64 && ch < 91) || (ch > 96 && ch < 123) || ch == '-' || ch == '_') { 1130 var sport string 1131 if port != "443" && port != "80" && port != "" { 1132 sport = ":" + port 1133 } 1134 var q string 1135 if len(uurl.RawQuery) > 0 { 1136 q = "?" + uurl.RawQuery 1137 } 1138 var frag string 1139 if len(uurl.Fragment) > 0 { 1140 frag = "#" + uurl.Fragment 1141 } 1142 media.FURL = host + sport + path + q + frag 1143 media.URL = scheme + "//" + media.FURL 1144 //fmt.Printf("ytInvalid true: %+v\n",v) 1145 return true 1146 } 1147 } 1148 return false 1149 } 1150 ytInvalid2 := func(t string) bool { 1151 for _, ch := range t { 1152 if !((ch > 47 && ch < 58) || ch == 'h' || ch == 'm' || ch == 's') { 1153 //fmt.Printf("ytInvalid2 true: %+v\n",t) 1154 return true 1155 } 1156 } 1157 return false 1158 } 1159 if strings.HasSuffix(host, ".youtube.com") && path == "/watch" { 1160 video, ok := query["v"] 1161 if ok && len(video) >= 1 && video[0] != "" { 1162 v := video[0] 1163 if ytInvalid(v) { 1164 return media, true 1165 } 1166 var t, t2 string 1167 tt, ok := query["t"] 1168 if ok && len(tt) >= 1 { 1169 t, t2 = tt[0], tt[0] 1170 } 1171 media.Type = ERawExternal 1172 if t != "" && !ytInvalid2(t) { 1173 s, m, h := parseDuration(t2) 1174 calc := s + (m * 60) + (h * 60 * 60) 1175 if calc > 0 { 1176 t = "&t=" + t 1177 t2 = "?start=" + strconv.Itoa(calc) 1178 } else { 1179 t, t2 = "", "" 1180 } 1181 } 1182 l := "https://" + host + path + "?v=" + v + t 1183 media.Body = "<iframe class='postIframe'src='https://www.youtube-nocookie.com/embed/" + v + t2 + "'frameborder=0 allowfullscreen></iframe><noscript><a href='" + l + "'>" + l + "</a></noscript>" 1184 return media, true 1185 } 1186 } else if host == "youtu.be" { 1187 v := strings.TrimPrefix(path, "/") 1188 if ytInvalid(v) { 1189 return media, true 1190 } 1191 l := "https://youtu.be/" + v 1192 media.Type = ERawExternal 1193 media.Body = "<iframe class='postIframe'src='https://www.youtube-nocookie.com/embed/" + v + "'frameborder=0 allowfullscreen></iframe><noscript><a href='" + l + "'>" + l + "</a></noscript>" 1194 return media, true 1195 } else if strings.HasPrefix(host, "www.nicovideo.jp") && strings.HasPrefix(path, "/watch/sm") { 1196 vid, err := strconv.ParseInt(strings.TrimPrefix(path, "/watch/sm"), 10, 64) 1197 if err == nil { 1198 var sport string 1199 if port != "443" && port != "80" && port != "" { 1200 sport = ":" + port 1201 } 1202 media.Type = ERawExternal 1203 sm := strconv.FormatInt(vid, 10) 1204 l := "https://" + host + sport + path 1205 media.Body = "<iframe class='postIframe'src='https://embed.nicovideo.jp/watch/sm" + sm + "?jsapi=1&playerId=1'frameborder=0 allowfullscreen></iframe><noscript><a href='" + l + "'>" + l + "</a></noscript>" 1206 return media, true 1207 } 1208 } 1209 1210 if lastFrag := pathFrags[len(pathFrags)-1]; lastFrag != "" { 1211 // TODO: Write a function for getting the file extension of a string 1212 ext := strings.TrimPrefix(filepath.Ext(lastFrag), ".") 1213 if len(ext) != 0 { 1214 if ImageFileExts.Contains(ext) { 1215 media.Type = EImage 1216 var sport string 1217 if port != "443" && port != "80" && port != "" { 1218 sport = ":" + port 1219 } 1220 media.URL = scheme + "//" + host + sport + path 1221 return media, true 1222 } 1223 // TODO: Support external videos 1224 } 1225 } 1226 } 1227 1228 var sport string 1229 if port != "443" && port != "80" && port != "" { 1230 sport = ":" + port 1231 } 1232 var q string 1233 if len(uurl.RawQuery) > 0 { 1234 q = "?" + uurl.RawQuery 1235 } 1236 var frag string 1237 if len(uurl.Fragment) > 0 { 1238 frag = "#" + uurl.Fragment 1239 } 1240 media.FURL = host + sport + path + q + frag 1241 media.URL = scheme + "//" + media.FURL 1242 1243 return media, true 1244 } 1245 1246 func parseDuration(dur string) (s, m, h int) { 1247 var ibuf []byte 1248 for _, ch := range dur { 1249 switch { 1250 case ch > 47 && ch < 58: 1251 ibuf = append(ibuf, byte(ch)) 1252 case ch == 'h': 1253 h, _ = strconv.Atoi(string(ibuf)) 1254 ibuf = ibuf[:0] 1255 case ch == 'm': 1256 m, _ = strconv.Atoi(string(ibuf)) 1257 ibuf = ibuf[:0] 1258 case ch == 's': 1259 s, _ = strconv.Atoi(string(ibuf)) 1260 ibuf = ibuf[:0] 1261 } 1262 } 1263 // Stop accidental uses of timestamps 1264 if h == 0 && m == 0 && s < 2 { 1265 s = 0 1266 } 1267 return s, m, h 1268 } 1269 1270 // TODO: Write a test for this 1271 func CoerceIntString(data string) (res, length int) { 1272 if !(data[0] > 47 && data[0] < 58) { 1273 return 0, 1 1274 } 1275 i := 0 1276 for ; len(data) > i; i++ { 1277 if !(data[i] > 47 && data[i] < 58) { 1278 conv, err := strconv.Atoi(data[0:i]) 1279 if err != nil { 1280 return 0, i 1281 } 1282 return conv, i 1283 } 1284 } 1285 1286 conv, err := strconv.Atoi(data) 1287 if err != nil { 1288 return 0, i 1289 } 1290 return conv, i 1291 } 1292 1293 // TODO: Write tests for this 1294 // Make sure we reflect changes to this in the JS port in /public/global.js 1295 func Paginate(currentPage, lastPage, maxPages int) (out []int) { 1296 diff := lastPage - currentPage 1297 pre := 3 1298 if diff < 3 { 1299 pre = maxPages - diff 1300 } 1301 1302 page := currentPage - pre 1303 if page < 0 { 1304 page = 0 1305 } 1306 for len(out) < maxPages && page < lastPage { 1307 page++ 1308 out = append(out, page) 1309 } 1310 return out 1311 } 1312 1313 // TODO: Write tests for this 1314 // Make sure we reflect changes to this in the JS port in /public/global.js 1315 func PageOffset(count, page, perPage int) (int, int, int) { 1316 var offset int 1317 lastPage := LastPage(count, perPage) 1318 if page > 1 { 1319 offset = (perPage * page) - perPage 1320 } else if page == -1 { 1321 page = lastPage 1322 offset = (perPage * page) - perPage 1323 } else { 1324 page = 1 1325 } 1326 1327 // ? - This has been commented out as it created a bug in the user manager where the first user on a page wouldn't be accessible 1328 // We don't want the offset to overflow the slices, if everything's in memory 1329 /*if offset >= (count - 1) { 1330 offset = 0 1331 }*/ 1332 return offset, page, lastPage 1333 } 1334 1335 // TODO: Write tests for this 1336 // Make sure we reflect changes to this in the JS port in /public/global.js 1337 func LastPage(count, perPage int) int { 1338 return (count / perPage) + 1 1339 }