from typing import Dict, List, Any | |
from llama_cpp import Llama | |
class EndpointHandler(): | |
def __init__(self, path="", vision_model="obsidian3b"): | |
self.model = Llama("gemma-2b.q8_0.gguf") | |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
""" | |
data args: | |
inputs (:obj: `str`) | |
image (:obj: `Image`) | |
Return: | |
A :obj:`list` | `dict`: will be serialized and returned | |
""" | |
# get inputs | |
inputs = data.pop("inputs", "") | |
#image = data.pop("image", None) | |
res = self.model(inputs, temperature=0.33, top_p=0.85, top_k=42) | |
return res["choices"][0]["text"] | |
#inputs = self.processor(inputs, image, return_tensors="pt") | |
#res = self.model.generate(**inputs, do_sample=False, max_new_tokens=4096) | |
#return self.processor.decode(res[0], skip_special_tokens=True) | |
#if image: | |
# perform image classification using Obsidian 3b vision | |
#image_features = self.vision.encode_image(image) | |
#image_embedding = self.vision.extract_feature(image_features) | |
#image_caption = self.vision.generate_caption(image_embedding) | |
# combine text and image captions | |
#combined_captions = [inputs, image_caption] | |
# run text classification on combined captions | |
#prediction = self.pipeline(combined_captions, temperature=0.33, num_beams=5, stop=[], do_sample=True) | |
#return prediction | |
#else: | |
# run text classification on plain text input | |
# prediction = self.pipeline(inputs, temperature=0.33, num_beams=5, stop=[], do_sample=True) | |
# return prediction |