davanstrien HF Staff commited on
Commit
6a53027
·
1 Parent(s): 0b82992

refactor: simplify LLM initialization by removing gpu_memory_utilization parameter

Browse files
Files changed (1) hide show
  1. generate_summaries_uv.py +1 -3
generate_summaries_uv.py CHANGED
@@ -111,9 +111,7 @@ def generate_summaries(
111
 
112
  # Initialize model and tokenizer from local path
113
  logger.info(f"Initializing vLLM model from local path: {local_model_path}")
114
- llm = LLM(
115
- model=local_model_path, gpu_memory_utilization=0.98, enable_chunked_prefill=True
116
- )
117
  tokenizer = AutoTokenizer.from_pretrained(local_model_path)
118
  sampling_params = SamplingParams(
119
  temperature=temperature,
 
111
 
112
  # Initialize model and tokenizer from local path
113
  logger.info(f"Initializing vLLM model from local path: {local_model_path}")
114
+ llm = LLM(model=local_model_path, enable_chunked_prefill=True)
 
 
115
  tokenizer = AutoTokenizer.from_pretrained(local_model_path)
116
  sampling_params = SamplingParams(
117
  temperature=temperature,