Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: open-compass/opencompass
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: open-compass/opencompass
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: revert-1679-add_lora_for_vllm
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 1 commit
  • 1 file changed
  • 1 contributor

Commits on Nov 12, 2024

  1. Revert "add single lora adapter support for vLLM inference. (#1679)"

    This reverts commit 3ec178f.
    bittersweet1999 authored Nov 12, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    a0e607a View commit details
Showing with 2 additions and 7 deletions.
  1. +2 −7 opencompass/models/vllm.py
9 changes: 2 additions & 7 deletions opencompass/models/vllm.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@

try:
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest
except ImportError:
LLM, SamplingParams = None, None

@@ -26,7 +25,6 @@ def __init__(
meta_template: Optional[Dict] = None,
mode: str = 'none',
use_fastchat_template: bool = False,
lora_path: str = None,
stop_words: List[str] = [],
):
super().__init__(path=path,
@@ -40,7 +38,7 @@ def __init__(
self.tokenizer = self.model.get_tokenizer()
self.generation_kwargs = generation_kwargs
self.generation_kwargs.pop('do_sample', None)
self.lora_path = lora_path

assert mode in ['none', 'mid']
self.mode = mode
self.use_fastchat_template = use_fastchat_template
@@ -98,10 +96,7 @@ def generate(self,
_stop = list(set(self.stop_words + stopping_criteria))
generation_kwargs.update({'stop': _stop})
sampling_kwargs = SamplingParams(**generation_kwargs)
if not self.lora_path:
outputs = self.model.generate(inputs, sampling_kwargs)
else:
outputs = self.model.generate(inputs, sampling_kwargs, lora_request=LoRARequest("sql_adapter", 1, self.lora_path))
outputs = self.model.generate(inputs, sampling_kwargs)

prompt_list, output_strs = [], []
for output in outputs: