Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
6a53027
1
Parent(s):
0b82992
refactor: simplify LLM initialization by removing gpu_memory_utilization parameter
Browse files- generate_summaries_uv.py +1 -3
generate_summaries_uv.py
CHANGED
@@ -111,9 +111,7 @@ def generate_summaries(
|
|
111 |
|
112 |
# Initialize model and tokenizer from local path
|
113 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
114 |
-
llm = LLM(
|
115 |
-
model=local_model_path, gpu_memory_utilization=0.98, enable_chunked_prefill=True
|
116 |
-
)
|
117 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
118 |
sampling_params = SamplingParams(
|
119 |
temperature=temperature,
|
|
|
111 |
|
112 |
# Initialize model and tokenizer from local path
|
113 |
logger.info(f"Initializing vLLM model from local path: {local_model_path}")
|
114 |
+
llm = LLM(model=local_model_path, enable_chunked_prefill=True)
|
|
|
|
|
115 |
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
116 |
sampling_params = SamplingParams(
|
117 |
temperature=temperature,
|