Update llama_gptq.py

ypwhs · ypwhs · Apr 1, 2023 · Apr 1, 2023 · Apr 1, 2023 · Apr 1, 2023
commit a7c93fd8b35b91b566f34de1ad09f0ed768128a4
diff --git a/predictors/llama_gptq.py b/predictors/llama_gptq.py
@@ -22,6 +22,7 @@ def __init__(self, model_name, checkpoint_path='llama7b-2m-4bit-128g.pt', wbits=
         print(f'Loading model from {checkpoint_path} ...')
         model: LlamaForCausalLM = load_quant(model_name, checkpoint_path, wbits, groupsize)
         model.eval()
+        model.to(self.device)
         self.model = model
         end = time.perf_counter()
         print(f'Successfully loaded model {model_name}, time cost: {end - start:.2f}s')