github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/nerdstash_v1-tokenizer/specials.txt (about) 1 3 2 4 3 9 4 0 5 1 6 2 7 5 8 6 9 7 10 8 11 " 12 13 14 15 ─ 16 *** 17 ⁂ 18 ---- 19 20 Type 21 Tags 22 Title 23 Style 24 Genre 25 Rating 26 Author 27 <|pad|> 28 29 <|fill|> 30 Summary 31 <|mask|> 32 Glossary 33 <|mtend|> 34 Characters 35 Knowledge 36 <|fillend|> 37 <|maskend|> 38 <|rubyend|> 39 <|mtvocab|> 40 <|masklong|> 41 42 <|spmspace|> 43 <|reserved6|> 44 <|reserved3|> 45 <|reserved2|> 46 <|rubystart|> 47 <|reserved0|> 48 <|reserved7|> 49 <|reserved4|> 50 <|rubycover|> 51 <|endoftext|> 52 <|reserved5|> 53 <|reserved8|> 54 <|maskshort|> 55 <|reserved9|> 56 <|reserved1|> 57 <|mtsenglish|> 58 <|reserved10|> 59 <|maskmedium|> 60 <|masksingle|> 61 <|mtvocabend|> 62 <|mtvenglish|> 63 <|mtsentence|> 64 <|startoftext|> 65 <|mtvjapanese|> 66 <|mtsjapanese|> 67 68 <|mtsentenceend|> 69 <|maskparagraph|> 70 <|spmspace|><|spmspace|> 71 <|spmspace|><|spmspace|><|spmspace|><|spmspace|> 72 <|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|> 73 <|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|> 74 <|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>