github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/llama-tokenizer/tokenizer_config.json (about) 1 { 2 "add_bos_token": false, 3 "add_eos_token": false, 4 "bos_token": { 5 "__type": "AddedToken", 6 "content": "<s>", 7 "lstrip": false, 8 "normalized": false, 9 "rstrip": false, 10 "single_word": false 11 }, 12 "clean_up_tokenization_spaces": false, 13 "eos_token": { 14 "__type": "AddedToken", 15 "content": "</s>", 16 "lstrip": false, 17 "normalized": false, 18 "rstrip": false, 19 "single_word": false 20 }, 21 "legacy": false, 22 "model_max_length": 1000000000000000019884624838656, 23 "pad_token": null, 24 "padding_side": "right", 25 "sp_model_kwargs": {}, 26 "tokenizer_class": "LlamaTokenizer", 27 "unk_token": { 28 "__type": "AddedToken", 29 "content": "<unk>", 30 "lstrip": false, 31 "normalized": false, 32 "rstrip": false, 33 "single_word": false 34 } 35 }