github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/mistral-tokenizer/duplicates.json (about) 1 [ 2 {"old_id": 35, "new_id": 28705, "repr": " "}, 3 {"old_id": 104, "new_id": 28706, "repr": "e"}, 4 {"old_id": 119, "new_id": 28707, "repr": "t"}, 5 {"old_id": 100, "new_id": 28708, "repr": "a"}, 6 {"old_id": 114, "new_id": 28709, "repr": "o"}, 7 {"old_id": 108, "new_id": 28710, "repr": "i"}, 8 {"old_id": 113, "new_id": 28711, "repr": "n"}, 9 {"old_id": 117, "new_id": 28712, "repr": "r"}, 10 {"old_id": 118, "new_id": 28713, "repr": "s"}, 11 {"old_id": 111, "new_id": 28714, "repr": "l"}, 12 {"old_id": 103, "new_id": 28715, "repr": "d"}, 13 {"old_id": 107, "new_id": 28716, "repr": "h"}, 14 {"old_id": 102, "new_id": 28717, "repr": "c"}, 15 {"old_id": 120, "new_id": 28718, "repr": "u"}, 16 {"old_id": 112, "new_id": 28719, "repr": "m"}, 17 {"old_id": 115, "new_id": 28720, "repr": "p"}, 18 {"old_id": 106, "new_id": 28721, "repr": "g"}, 19 {"old_id": 105, "new_id": 28722, "repr": "f"}, 20 {"old_id": 49, "new_id": 28723, "repr": "."}, 21 {"old_id": 124, "new_id": 28724, "repr": "y"}, 22 {"old_id": 47, "new_id": 28725, "repr": ","}, 23 {"old_id": 101, "new_id": 28726, "repr": "b"}, 24 {"old_id": 122, "new_id": 28727, "repr": "w"}, 25 {"old_id": 121, "new_id": 28728, "repr": "v"}, 26 {"old_id": 110, "new_id": 28729, "repr": "k"}, 27 {"old_id": 98, "new_id": 28730, "repr": "_"}, 28 {"old_id": 44, "new_id": 28731, "repr": ")"}, 29 {"old_id": 43, "new_id": 28732, "repr": "("}, 30 {"old_id": 48, "new_id": 28733, "repr": "-"}, 31 {"old_id": 51, "new_id": 28734, "repr": "0"}, 32 {"old_id": 86, "new_id": 28735, "repr": "S"}, 33 {"old_id": 45, "new_id": 28736, "repr": "*"}, 34 {"old_id": 76, "new_id": 28737, "repr": "I"}, 35 {"old_id": 87, "new_id": 28738, "repr": "T"}, 36 {"old_id": 37, "new_id": 28739, "repr": "\""}, 37 {"old_id": 52, "new_id": 28740, "repr": "1"}, 38 {"old_id": 68, "new_id": 28741, "repr": "A"}, 39 {"old_id": 42, "new_id": 28742, "repr": "'"}, 40 {"old_id": 70, "new_id": 28743, "repr": "C"}, 41 {"old_id": 123, "new_id": 28744, "repr": "x"}, 42 {"old_id": 62, "new_id": 28745, "repr": ";"}, 43 {"old_id": 64, "new_id": 28746, "repr": "="}, 44 {"old_id": 61, "new_id": 28747, "repr": ":"}, 45 {"old_id": 50, "new_id": 28748, "repr": "/"}, 46 {"old_id": 72, "new_id": 28749, "repr": "E"}, 47 {"old_id": 53, "new_id": 28750, "repr": "2"}, 48 {"old_id": 126, "new_id": 28751, "repr": "{"}, 49 {"old_id": 128, "new_id": 28752, "repr": "}"}, 50 {"old_id": 83, "new_id": 28753, "repr": "P"}, 51 {"old_id": 85, "new_id": 28754, "repr": "R"}, 52 {"old_id": 80, "new_id": 28755, "repr": "M"}, 53 {"old_id": 95, "new_id": 28756, "repr": "\\"}, 54 {"old_id": 71, "new_id": 28757, "repr": "D"}, 55 {"old_id": 79, "new_id": 28758, "repr": "L"}, 56 {"old_id": 81, "new_id": 28759, "repr": "N"}, 57 {"old_id": 69, "new_id": 28760, "repr": "B"}, 58 {"old_id": 82, "new_id": 28762, "repr": "O"}, 59 {"old_id": 125, "new_id": 28764, "repr": "z"}, 60 {"old_id": 73, "new_id": 28765, "repr": "F"}, 61 {"old_id": 127, "new_id": 28766, "repr": "|"}, 62 {"old_id": 65, "new_id": 28767, "repr": ">"}, 63 {"old_id": 109, "new_id": 28768, "repr": "j"}, 64 {"old_id": 75, "new_id": 28769, "repr": "H"}, 65 {"old_id": 54, "new_id": 28770, "repr": "3"}, 66 {"old_id": 38, "new_id": 28771, "repr": "#"}, 67 {"old_id": 60, "new_id": 28774, "repr": "9"}, 68 {"old_id": 116, "new_id": 28775, "repr": "q"}, 69 {"old_id": 39, "new_id": 28776, "repr": "$"}, 70 {"old_id": 74, "new_id": 28777, "repr": "G"}, 71 {"old_id": 88, "new_id": 28779, "repr": "U"}, 72 {"old_id": 90, "new_id": 28780, "repr": "W"}, 73 {"old_id": 55, "new_id": 28781, "repr": "4"}, 74 {"old_id": 56, "new_id": 28782, "repr": "5"}, 75 {"old_id": 59, "new_id": 28783, "repr": "8"}, 76 {"old_id": 57, "new_id": 28784, "repr": "6"}, 77 {"old_id": 58, "new_id": 28787, "repr": "7"}, 78 {"old_id": 63, "new_id": 28789, "repr": "<"}, 79 {"old_id": 89, "new_id": 28790, "repr": "V"}, 80 {"old_id": 94, "new_id": 28792, "repr": "["}, 81 {"old_id": 96, "new_id": 28793, "repr": "]"}, 82 {"old_id": 78, "new_id": 28796, "repr": "K"}, 83 {"old_id": 77, "new_id": 28798, "repr": "J"}, 84 {"old_id": 41, "new_id": 28800, "repr": "&"}, 85 {"old_id": 16, "new_id": 28801, "repr": "\r"}, 86 {"old_id": 92, "new_id": 28802, "repr": "Y"}, 87 {"old_id": 66, "new_id": 28804, "repr": "?"}, 88 {"old_id": 46, "new_id": 28806, "repr": "+"}, 89 {"old_id": 36, "new_id": 28808, "repr": "!"}, 90 {"old_id": 91, "new_id": 28814, "repr": "X"}, 91 {"old_id": 97, "new_id": 28815, "repr": "^"}, 92 {"old_id": 67, "new_id": 28818, "repr": "@"}, 93 {"old_id": 40, "new_id": 28823, "repr": "%"}, 94 {"old_id": 84, "new_id": 28824, "repr": "Q"}, 95 {"old_id": 93, "new_id": 28828, "repr": "Z"}, 96 {"old_id": 99, "new_id": 28832, "repr": "`"}, 97 {"old_id": 129, "new_id": 28845, "repr": "~"}, 98 {"old_id": 4, "new_id": 29534, "repr": "\u0001"}, 99 {"old_id": 15, "new_id": 29683, "repr": "\u000c"}, 100 {"old_id": 30, "new_id": 30246, "repr": "\u001b"}, 101 {"old_id": 21, "new_id": 30298, "repr": "\u0012"}, 102 {"old_id": 9, "new_id": 30314, "repr": "\u0006"}, 103 {"old_id": 19, "new_id": 30388, "repr": "\u0010"}, 104 {"old_id": 22, "new_id": 30453, "repr": "\u0013"}, 105 {"old_id": 17, "new_id": 30517, "repr": "\u000e"}, 106 {"old_id": 8, "new_id": 30550, "repr": "\u0005"}, 107 {"old_id": 5, "new_id": 30551, "repr": "\u0002"}, 108 {"old_id": 27, "new_id": 30555, "repr": "\u0018"}, 109 {"old_id": 20, "new_id": 30557, "repr": "\u0011"}, 110 {"old_id": 14, "new_id": 30638, "repr": "\u000b"}, 111 {"old_id": 6, "new_id": 30662, "repr": "\u0003"}, 112 {"old_id": 24, "new_id": 30675, "repr": "\u0015"}, 113 {"old_id": 18, "new_id": 30698, "repr": "\u000f"}, 114 {"old_id": 23, "new_id": 30721, "repr": "\u0014"}, 115 {"old_id": 7, "new_id": 30724, "repr": "\u0004"}, 116 {"old_id": 29, "new_id": 30759, "repr": "\u001a"}, 117 {"old_id": 26, "new_id": 30841, "repr": "\u0017"}, 118 {"old_id": 25, "new_id": 30935, "repr": "\u0016"}, 119 {"old_id": 10, "new_id": 30963, "repr": "\u0007"}, 120 {"old_id": 28, "new_id": 30969, "repr": "\u0019"}, 121 {"old_id": 130, "new_id": 30982, "repr": "\u007f"}, 122 {"old_id": 11, "new_id": 31129, "repr": "\b"}, 123 {"old_id": 31, "new_id": 31134, "repr": "\u001c"}, 124 {"old_id": 33, "new_id": 31150, "repr": "\u001e"}, 125 {"old_id": 34, "new_id": 31217, "repr": "\u001f"}, 126 {"old_id": 32, "new_id": 31236, "repr": "\u001d"} 127 ]