github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/mistral-tokenizer/duplicates.json (about)

     1  [
     2    {"old_id": 35, "new_id": 28705, "repr": " "},
     3    {"old_id": 104, "new_id": 28706, "repr": "e"},
     4    {"old_id": 119, "new_id": 28707, "repr": "t"},
     5    {"old_id": 100, "new_id": 28708, "repr": "a"},
     6    {"old_id": 114, "new_id": 28709, "repr": "o"},
     7    {"old_id": 108, "new_id": 28710, "repr": "i"},
     8    {"old_id": 113, "new_id": 28711, "repr": "n"},
     9    {"old_id": 117, "new_id": 28712, "repr": "r"},
    10    {"old_id": 118, "new_id": 28713, "repr": "s"},
    11    {"old_id": 111, "new_id": 28714, "repr": "l"},
    12    {"old_id": 103, "new_id": 28715, "repr": "d"},
    13    {"old_id": 107, "new_id": 28716, "repr": "h"},
    14    {"old_id": 102, "new_id": 28717, "repr": "c"},
    15    {"old_id": 120, "new_id": 28718, "repr": "u"},
    16    {"old_id": 112, "new_id": 28719, "repr": "m"},
    17    {"old_id": 115, "new_id": 28720, "repr": "p"},
    18    {"old_id": 106, "new_id": 28721, "repr": "g"},
    19    {"old_id": 105, "new_id": 28722, "repr": "f"},
    20    {"old_id": 49, "new_id": 28723, "repr": "."},
    21    {"old_id": 124, "new_id": 28724, "repr": "y"},
    22    {"old_id": 47, "new_id": 28725, "repr": ","},
    23    {"old_id": 101, "new_id": 28726, "repr": "b"},
    24    {"old_id": 122, "new_id": 28727, "repr": "w"},
    25    {"old_id": 121, "new_id": 28728, "repr": "v"},
    26    {"old_id": 110, "new_id": 28729, "repr": "k"},
    27    {"old_id": 98, "new_id": 28730, "repr": "_"},
    28    {"old_id": 44, "new_id": 28731, "repr": ")"},
    29    {"old_id": 43, "new_id": 28732, "repr": "("},
    30    {"old_id": 48, "new_id": 28733, "repr": "-"},
    31    {"old_id": 51, "new_id": 28734, "repr": "0"},
    32    {"old_id": 86, "new_id": 28735, "repr": "S"},
    33    {"old_id": 45, "new_id": 28736, "repr": "*"},
    34    {"old_id": 76, "new_id": 28737, "repr": "I"},
    35    {"old_id": 87, "new_id": 28738, "repr": "T"},
    36    {"old_id": 37, "new_id": 28739, "repr": "\""},
    37    {"old_id": 52, "new_id": 28740, "repr": "1"},
    38    {"old_id": 68, "new_id": 28741, "repr": "A"},
    39    {"old_id": 42, "new_id": 28742, "repr": "'"},
    40    {"old_id": 70, "new_id": 28743, "repr": "C"},
    41    {"old_id": 123, "new_id": 28744, "repr": "x"},
    42    {"old_id": 62, "new_id": 28745, "repr": ";"},
    43    {"old_id": 64, "new_id": 28746, "repr": "="},
    44    {"old_id": 61, "new_id": 28747, "repr": ":"},
    45    {"old_id": 50, "new_id": 28748, "repr": "/"},
    46    {"old_id": 72, "new_id": 28749, "repr": "E"},
    47    {"old_id": 53, "new_id": 28750, "repr": "2"},
    48    {"old_id": 126, "new_id": 28751, "repr": "{"},
    49    {"old_id": 128, "new_id": 28752, "repr": "}"},
    50    {"old_id": 83, "new_id": 28753, "repr": "P"},
    51    {"old_id": 85, "new_id": 28754, "repr": "R"},
    52    {"old_id": 80, "new_id": 28755, "repr": "M"},
    53    {"old_id": 95, "new_id": 28756, "repr": "\\"},
    54    {"old_id": 71, "new_id": 28757, "repr": "D"},
    55    {"old_id": 79, "new_id": 28758, "repr": "L"},
    56    {"old_id": 81, "new_id": 28759, "repr": "N"},
    57    {"old_id": 69, "new_id": 28760, "repr": "B"},
    58    {"old_id": 82, "new_id": 28762, "repr": "O"},
    59    {"old_id": 125, "new_id": 28764, "repr": "z"},
    60    {"old_id": 73, "new_id": 28765, "repr": "F"},
    61    {"old_id": 127, "new_id": 28766, "repr": "|"},
    62    {"old_id": 65, "new_id": 28767, "repr": ">"},
    63    {"old_id": 109, "new_id": 28768, "repr": "j"},
    64    {"old_id": 75, "new_id": 28769, "repr": "H"},
    65    {"old_id": 54, "new_id": 28770, "repr": "3"},
    66    {"old_id": 38, "new_id": 28771, "repr": "#"},
    67    {"old_id": 60, "new_id": 28774, "repr": "9"},
    68    {"old_id": 116, "new_id": 28775, "repr": "q"},
    69    {"old_id": 39, "new_id": 28776, "repr": "$"},
    70    {"old_id": 74, "new_id": 28777, "repr": "G"},
    71    {"old_id": 88, "new_id": 28779, "repr": "U"},
    72    {"old_id": 90, "new_id": 28780, "repr": "W"},
    73    {"old_id": 55, "new_id": 28781, "repr": "4"},
    74    {"old_id": 56, "new_id": 28782, "repr": "5"},
    75    {"old_id": 59, "new_id": 28783, "repr": "8"},
    76    {"old_id": 57, "new_id": 28784, "repr": "6"},
    77    {"old_id": 58, "new_id": 28787, "repr": "7"},
    78    {"old_id": 63, "new_id": 28789, "repr": "<"},
    79    {"old_id": 89, "new_id": 28790, "repr": "V"},
    80    {"old_id": 94, "new_id": 28792, "repr": "["},
    81    {"old_id": 96, "new_id": 28793, "repr": "]"},
    82    {"old_id": 78, "new_id": 28796, "repr": "K"},
    83    {"old_id": 77, "new_id": 28798, "repr": "J"},
    84    {"old_id": 41, "new_id": 28800, "repr": "&"},
    85    {"old_id": 16, "new_id": 28801, "repr": "\r"},
    86    {"old_id": 92, "new_id": 28802, "repr": "Y"},
    87    {"old_id": 66, "new_id": 28804, "repr": "?"},
    88    {"old_id": 46, "new_id": 28806, "repr": "+"},
    89    {"old_id": 36, "new_id": 28808, "repr": "!"},
    90    {"old_id": 91, "new_id": 28814, "repr": "X"},
    91    {"old_id": 97, "new_id": 28815, "repr": "^"},
    92    {"old_id": 67, "new_id": 28818, "repr": "@"},
    93    {"old_id": 40, "new_id": 28823, "repr": "%"},
    94    {"old_id": 84, "new_id": 28824, "repr": "Q"},
    95    {"old_id": 93, "new_id": 28828, "repr": "Z"},
    96    {"old_id": 99, "new_id": 28832, "repr": "`"},
    97    {"old_id": 129, "new_id": 28845, "repr": "~"},
    98    {"old_id": 4, "new_id": 29534, "repr": "\u0001"},
    99    {"old_id": 15, "new_id": 29683, "repr": "\u000c"},
   100    {"old_id": 30, "new_id": 30246, "repr": "\u001b"},
   101    {"old_id": 21, "new_id": 30298, "repr": "\u0012"},
   102    {"old_id": 9, "new_id": 30314, "repr": "\u0006"},
   103    {"old_id": 19, "new_id": 30388, "repr": "\u0010"},
   104    {"old_id": 22, "new_id": 30453, "repr": "\u0013"},
   105    {"old_id": 17, "new_id": 30517, "repr": "\u000e"},
   106    {"old_id": 8, "new_id": 30550, "repr": "\u0005"},
   107    {"old_id": 5, "new_id": 30551, "repr": "\u0002"},
   108    {"old_id": 27, "new_id": 30555, "repr": "\u0018"},
   109    {"old_id": 20, "new_id": 30557, "repr": "\u0011"},
   110    {"old_id": 14, "new_id": 30638, "repr": "\u000b"},
   111    {"old_id": 6, "new_id": 30662, "repr": "\u0003"},
   112    {"old_id": 24, "new_id": 30675, "repr": "\u0015"},
   113    {"old_id": 18, "new_id": 30698, "repr": "\u000f"},
   114    {"old_id": 23, "new_id": 30721, "repr": "\u0014"},
   115    {"old_id": 7, "new_id": 30724, "repr": "\u0004"},
   116    {"old_id": 29, "new_id": 30759, "repr": "\u001a"},
   117    {"old_id": 26, "new_id": 30841, "repr": "\u0017"},
   118    {"old_id": 25, "new_id": 30935, "repr": "\u0016"},
   119    {"old_id": 10, "new_id": 30963, "repr": "\u0007"},
   120    {"old_id": 28, "new_id": 30969, "repr": "\u0019"},
   121    {"old_id": 130, "new_id": 30982, "repr": "\u007f"},
   122    {"old_id": 11, "new_id": 31129, "repr": "\b"},
   123    {"old_id": 31, "new_id": 31134, "repr": "\u001c"},
   124    {"old_id": 33, "new_id": 31150, "repr": "\u001e"},
   125    {"old_id": 34, "new_id": 31217, "repr": "\u001f"},
   126    {"old_id": 32, "new_id": 31236, "repr": "\u001d"}
   127  ]