MrOvkill commited on
Commit
5b5a6aa
·
verified ·
1 Parent(s): 4405341

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +5 -1
handler.py CHANGED
@@ -1,11 +1,15 @@
1
  from typing import Dict, List, Any
2
  from llama_cpp import Llama
 
3
 
4
  MAX_TOKENS=8192
5
 
6
  class EndpointHandler():
7
  def __init__(self, data):
8
- self.model = Llama.from_pretrained("lmstudio-ai/gemma-2b-it-GGUF", filename="gemma-2b.q8_0.gguf", n_ctx=8192, cache_dir="./", n_gpu_layers=99)
 
 
 
9
 
10
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
11
  inputs = data.pop("inputs", "")
 
1
  from typing import Dict, List, Any
2
  from llama_cpp import Llama
3
+ import torch
4
 
5
  MAX_TOKENS=8192
6
 
7
  class EndpointHandler():
8
  def __init__(self, data):
9
+ n_gpu_layers = GPU_LAYERS
10
+ if not torch.cuda.is_available():
11
+ n_gpu_layers = 0
12
+ self.model = Llama.from_pretrained("lmstudio-ai/gemma-2b-it-GGUF", filename="gemma-2b-it-q4_k_m.gguf", n_ctx=8192, cache_dir="./", n_gpu_layers=n_gpu_layers)
13
 
14
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
15
  inputs = data.pop("inputs", "")