github.com/coyove/sdss@v0.0.0-20231129015646-c2ec58cca6a2/contrib/ngram/token_test.go (about)

     1  package ngram
     2  
     3  import (
     4  	"encoding/base64"
     5  	"fmt"
     6  	"testing"
     7  )
     8  
     9  func TestNGram(t *testing.T) {
    10  	fmt.Println(SplitMore("C4H7NO4 c++"))
    11  	fmt.Println(isCodeString("AB"), isCodeString("Unsaved"), isCodeString(base64.URLEncoding.EncodeToString([]byte("base64"))))
    12  
    13  	q := `		女朋友要求戒指 5-7w 预算😋❤️🥺过分么。
    14  r00t7		child napıyor children通勤 50 分钟,费用 7.2 元有必要买一辆小电驴吗 12憂鬱台灣烏龜
    15  15996301234 şile
    16  quần quật 18 ếng 1 ngày  là khỏi
    17  𝘮𝘶𝘳𝘪𝘦𝘯𝘥𝘰 𝞉 600 جزء من احتفلتي مع اصدقائي`
    18  
    19  	fmt.Println(Split(q + `Hablamos...? :>
    20  ✨Gatoteta✨
    21  amo mi lunar :>`))
    22  
    23  	fmt.Println(trigram(q))
    24  	fmt.Println("===")
    25  	fmt.Println(Split(`enable_diamonds Random selfies #randomppic #lits"match.;newbie" set @abc.def #tag事实🤔的s`))
    26  }