github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/llama-tokenizer/tokenizer_config.json (about)

     1  {
     2    "add_bos_token": false,
     3    "add_eos_token": false,
     4    "bos_token": {
     5      "__type": "AddedToken",
     6      "content": "<s>",
     7      "lstrip": false,
     8      "normalized": false,
     9      "rstrip": false,
    10      "single_word": false
    11    },
    12    "clean_up_tokenization_spaces": false,
    13    "eos_token": {
    14      "__type": "AddedToken",
    15      "content": "</s>",
    16      "lstrip": false,
    17      "normalized": false,
    18      "rstrip": false,
    19      "single_word": false
    20    },
    21    "legacy": false,
    22    "model_max_length": 1000000000000000019884624838656,
    23    "pad_token": null,
    24    "padding_side": "right",
    25    "sp_model_kwargs": {},
    26    "tokenizer_class": "LlamaTokenizer",
    27    "unk_token": {
    28      "__type": "AddedToken",
    29      "content": "<unk>",
    30      "lstrip": false,
    31      "normalized": false,
    32      "rstrip": false,
    33      "single_word": false
    34    }
    35  }