Update handler.py
Browse files- handler.py +5 -1
handler.py
CHANGED
@@ -1,11 +1,15 @@
|
|
1 |
from typing import Dict, List, Any
|
2 |
from llama_cpp import Llama
|
|
|
3 |
|
4 |
MAX_TOKENS=8192
|
5 |
|
6 |
class EndpointHandler():
|
7 |
def __init__(self, data):
|
8 |
-
|
|
|
|
|
|
|
9 |
|
10 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
11 |
inputs = data.pop("inputs", "")
|
|
|
1 |
from typing import Dict, List, Any
|
2 |
from llama_cpp import Llama
|
3 |
+
import torch
|
4 |
|
5 |
MAX_TOKENS=8192
|
6 |
|
7 |
class EndpointHandler():
|
8 |
def __init__(self, data):
|
9 |
+
n_gpu_layers = GPU_LAYERS
|
10 |
+
if not torch.cuda.is_available():
|
11 |
+
n_gpu_layers = 0
|
12 |
+
self.model = Llama.from_pretrained("lmstudio-ai/gemma-2b-it-GGUF", filename="gemma-2b-it-q4_k_m.gguf", n_ctx=8192, cache_dir="./", n_gpu_layers=n_gpu_layers)
|
13 |
|
14 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
15 |
inputs = data.pop("inputs", "")
|