github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/resources/data/llama3-tokenizer/config.json (about) 1 { 2 "architectures": [ 3 "LlamaForCausalLM" 4 ], 5 "attention_bias": false, 6 "attention_dropout": 0.0, 7 "bos_token_id": 128000, 8 "eos_token_id": 128001, 9 "hidden_act": "silu", 10 "hidden_size": 4096, 11 "initializer_range": 0.02, 12 "intermediate_size": 14336, 13 "max_position_embeddings": 8192, 14 "model_type": "llama", 15 "num_attention_heads": 32, 16 "num_hidden_layers": 32, 17 "num_key_value_heads": 8, 18 "pretraining_tp": 1, 19 "rms_norm_eps": 1e-05, 20 "rope_scaling": null, 21 "rope_theta": 500000.0, 22 "tie_word_embeddings": false, 23 "torch_dtype": "bfloat16", 24 "transformers_version": "4.40.0.dev0", 25 "use_cache": true, 26 "vocab_size": 128256 27 }