Initial commit. LLAMA

Browse files

Files changed (3) hide show

gemma-2b.q8_0.gguf +3 -0
handler.py +47 -0
requirements.txt +1 -0

gemma-2b.q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec68b50d23469882716782da8b680402246356c3f984e9a3b9bcc5bc15273140
+size 2669351840

handler.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from typing import Dict, List, Any
+from llama_cpp import Llama
+class EndpointHandler():
+    def __init__(self, path="", vision_model="obsidian3b"):
+        self.model = Llama("gemma-2b.q8_0.gguf")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        data args:
+            inputs (:obj: `str`)
+            image (:obj: `Image`)
+        Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        # get inputs
+        inputs = data.pop("inputs", "")
+        #image = data.pop("image", None)
+        res = self.model(inputs, temperature=0.33, top_p=0.85, top_k=42)
+        return res["choices"][0]["text"]
+        #inputs = self.processor(inputs, image, return_tensors="pt")
+        #res = self.model.generate(**inputs, do_sample=False, max_new_tokens=4096)
+        #return self.processor.decode(res[0], skip_special_tokens=True)
+        #if image:
+            # perform image classification using Obsidian 3b vision
+            #image_features = self.vision.encode_image(image)
+            #image_embedding = self.vision.extract_feature(image_features)
+            #image_caption = self.vision.generate_caption(image_embedding)
+            # combine text and image captions
+            #combined_captions = [inputs, image_caption]
+            # run text classification on combined captions
+            #prediction = self.pipeline(combined_captions, temperature=0.33, num_beams=5, stop=[], do_sample=True)
+            #return prediction
+        #else:
+            # run text classification on plain text input
+        #    prediction = self.pipeline(inputs, temperature=0.33, num_beams=5, stop=[], do_sample=True)
+        #    return prediction

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ llama-cpp-python