github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/llama-tokenizer/duplicates.json (about) 1 [ 2 {"old_id": 35, "new_id": 29871, "repr": " "}, 3 {"old_id": 104, "new_id": 29872, "repr": "e"}, 4 {"old_id": 119, "new_id": 29873, "repr": "t"}, 5 {"old_id": 100, "new_id": 29874, "repr": "a"}, 6 {"old_id": 108, "new_id": 29875, "repr": "i"}, 7 {"old_id": 113, "new_id": 29876, "repr": "n"}, 8 {"old_id": 114, "new_id": 29877, "repr": "o"}, 9 {"old_id": 117, "new_id": 29878, "repr": "r"}, 10 {"old_id": 118, "new_id": 29879, "repr": "s"}, 11 {"old_id": 111, "new_id": 29880, "repr": "l"}, 12 {"old_id": 103, "new_id": 29881, "repr": "d"}, 13 {"old_id": 107, "new_id": 29882, "repr": "h"}, 14 {"old_id": 102, "new_id": 29883, "repr": "c"}, 15 {"old_id": 120, "new_id": 29884, "repr": "u"}, 16 {"old_id": 112, "new_id": 29885, "repr": "m"}, 17 {"old_id": 115, "new_id": 29886, "repr": "p"}, 18 {"old_id": 106, "new_id": 29887, "repr": "g"}, 19 {"old_id": 105, "new_id": 29888, "repr": "f"}, 20 {"old_id": 49, "new_id": 29889, "repr": "."}, 21 {"old_id": 101, "new_id": 29890, "repr": "b"}, 22 {"old_id": 124, "new_id": 29891, "repr": "y"}, 23 {"old_id": 47, "new_id": 29892, "repr": ","}, 24 {"old_id": 122, "new_id": 29893, "repr": "w"}, 25 {"old_id": 121, "new_id": 29894, "repr": "v"}, 26 {"old_id": 110, "new_id": 29895, "repr": "k"}, 27 {"old_id": 52, "new_id": 29896, "repr": "1"}, 28 {"old_id": 44, "new_id": 29897, "repr": ")"}, 29 {"old_id": 43, "new_id": 29898, "repr": "("}, 30 {"old_id": 48, "new_id": 29899, "repr": "-"}, 31 {"old_id": 51, "new_id": 29900, "repr": "0"}, 32 {"old_id": 61, "new_id": 29901, "repr": ":"}, 33 {"old_id": 76, "new_id": 29902, "repr": "I"}, 34 {"old_id": 86, "new_id": 29903, "repr": "S"}, 35 {"old_id": 95, "new_id": 29905, "repr": "\\"}, 36 {"old_id": 53, "new_id": 29906, "repr": "2"}, 37 {"old_id": 70, "new_id": 29907, "repr": "C"}, 38 {"old_id": 37, "new_id": 29908, "repr": "\""}, 39 {"old_id": 68, "new_id": 29909, "repr": "A"}, 40 {"old_id": 87, "new_id": 29911, "repr": "T"}, 41 {"old_id": 126, "new_id": 29912, "repr": "{"}, 42 {"old_id": 128, "new_id": 29913, "repr": "}"}, 43 {"old_id": 50, "new_id": 29914, "repr": "/"}, 44 {"old_id": 42, "new_id": 29915, "repr": "'"}, 45 {"old_id": 123, "new_id": 29916, "repr": "x"}, 46 {"old_id": 98, "new_id": 29918, "repr": "_"}, 47 {"old_id": 125, "new_id": 29920, "repr": "z"}, 48 {"old_id": 64, "new_id": 29922, "repr": "="}, 49 {"old_id": 72, "new_id": 29923, "repr": "E"}, 50 {"old_id": 80, "new_id": 29924, "repr": "M"}, 51 {"old_id": 83, "new_id": 29925, "repr": "P"}, 52 {"old_id": 109, "new_id": 29926, "repr": "j"}, 53 {"old_id": 71, "new_id": 29928, "repr": "D"}, 54 {"old_id": 60, "new_id": 29929, "repr": "9"}, 55 {"old_id": 45, "new_id": 29930, "repr": "*"}, 56 {"old_id": 79, "new_id": 29931, "repr": "L"}, 57 {"old_id": 69, "new_id": 29933, "repr": "B"}, 58 {"old_id": 85, "new_id": 29934, "repr": "R"}, 59 {"old_id": 62, "new_id": 29936, "repr": ";"}, 60 {"old_id": 38, "new_id": 29937, "repr": "#"}, 61 {"old_id": 39, "new_id": 29938, "repr": "$"}, 62 {"old_id": 116, "new_id": 29939, "repr": "q"}, 63 {"old_id": 81, "new_id": 29940, "repr": "N"}, 64 {"old_id": 54, "new_id": 29941, "repr": "3"}, 65 {"old_id": 73, "new_id": 29943, "repr": "F"}, 66 {"old_id": 56, "new_id": 29945, "repr": "5"}, 67 {"old_id": 55, "new_id": 29946, "repr": "4"}, 68 {"old_id": 59, "new_id": 29947, "repr": "8"}, 69 {"old_id": 82, "new_id": 29949, "repr": "O"}, 70 {"old_id": 75, "new_id": 29950, "repr": "H"}, 71 {"old_id": 99, "new_id": 29952, "repr": "`"}, 72 {"old_id": 57, "new_id": 29953, "repr": "6"}, 73 {"old_id": 74, "new_id": 29954, "repr": "G"}, 74 {"old_id": 58, "new_id": 29955, "repr": "7"}, 75 {"old_id": 90, "new_id": 29956, "repr": "W"}, 76 {"old_id": 65, "new_id": 29958, "repr": ">"}, 77 {"old_id": 94, "new_id": 29961, "repr": "["}, 78 {"old_id": 96, "new_id": 29962, "repr": "]"}, 79 {"old_id": 89, "new_id": 29963, "repr": "V"}, 80 {"old_id": 88, "new_id": 29965, "repr": "U"}, 81 {"old_id": 63, "new_id": 29966, "repr": "<"}, 82 {"old_id": 77, "new_id": 29967, "repr": "J"}, 83 {"old_id": 78, "new_id": 29968, "repr": "K"}, 84 {"old_id": 66, "new_id": 29973, "repr": "?"}, 85 {"old_id": 46, "new_id": 29974, "repr": "+"}, 86 {"old_id": 92, "new_id": 29979, "repr": "Y"}, 87 {"old_id": 84, "new_id": 29984, "repr": "Q"}, 88 {"old_id": 97, "new_id": 29985, "repr": "^"}, 89 {"old_id": 41, "new_id": 29987, "repr": "&"}, 90 {"old_id": 127, "new_id": 29989, "repr": "|"}, 91 {"old_id": 91, "new_id": 29990, "repr": "X"}, 92 {"old_id": 36, "new_id": 29991, "repr": "!"}, 93 {"old_id": 67, "new_id": 29992, "repr": "@"}, 94 {"old_id": 40, "new_id": 29995, "repr": "%"}, 95 {"old_id": 93, "new_id": 29999, "repr": "Z"}, 96 {"old_id": 16, "new_id": 30004, "repr": "\r"}, 97 {"old_id": 129, "new_id": 30022, "repr": "~"} 98 ]