diff --git a/vllm/config.py b/vllm/config.py index 8acd15a3b7d9a..4f1ce87cb615b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -213,6 +213,8 @@ def get_hidden_size(self) -> int: return self.hf_config.hidden_size def get_head_size(self) -> int: + if hasattr(self.hf_config, "head_dim"): + return self.hf_config.head_dim # FIXME(woosuk): This may not be true for all models. return self.hf_config.hidden_size // self.hf_config.num_attention_heads