github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/llama3-tokenizer/config.json (about)

     1  {
     2    "architectures": [
     3      "LlamaForCausalLM"
     4    ],
     5    "attention_bias": false,
     6    "attention_dropout": 0.0,
     7    "bos_token_id": 128000,
     8    "eos_token_id": 128001,
     9    "hidden_act": "silu",
    10    "hidden_size": 4096,
    11    "initializer_range": 0.02,
    12    "intermediate_size": 14336,
    13    "max_position_embeddings": 8192,
    14    "model_type": "llama",
    15    "num_attention_heads": 32,
    16    "num_hidden_layers": 32,
    17    "num_key_value_heads": 8,
    18    "pretraining_tp": 1,
    19    "rms_norm_eps": 1e-05,
    20    "rope_scaling": null,
    21    "rope_theta": 500000.0,
    22    "tie_word_embeddings": false,
    23    "torch_dtype": "bfloat16",
    24    "transformers_version": "4.40.0.dev0",
    25    "use_cache": true,
    26    "vocab_size": 128256
    27  }