MrOvkill commited on
Commit
caa9e65
·
1 Parent(s): 2465d46

Initial commit. LLAMA

Browse files
Files changed (3) hide show
  1. gemma-2b.q8_0.gguf +3 -0
  2. handler.py +47 -0
  3. requirements.txt +1 -0
gemma-2b.q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec68b50d23469882716782da8b680402246356c3f984e9a3b9bcc5bc15273140
3
+ size 2669351840
handler.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from llama_cpp import Llama
3
+
4
+ class EndpointHandler():
5
+ def __init__(self, path="", vision_model="obsidian3b"):
6
+ self.model = Llama("gemma-2b.q8_0.gguf")
7
+
8
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
9
+ """
10
+ data args:
11
+ inputs (:obj: `str`)
12
+ image (:obj: `Image`)
13
+ Return:
14
+ A :obj:`list` | `dict`: will be serialized and returned
15
+ """
16
+ # get inputs
17
+ inputs = data.pop("inputs", "")
18
+ #image = data.pop("image", None)
19
+
20
+ res = self.model(inputs, temperature=0.33, top_p=0.85, top_k=42)
21
+
22
+ return res["choices"][0]["text"]
23
+
24
+ #inputs = self.processor(inputs, image, return_tensors="pt")
25
+ #res = self.model.generate(**inputs, do_sample=False, max_new_tokens=4096)
26
+ #return self.processor.decode(res[0], skip_special_tokens=True)
27
+
28
+ #if image:
29
+ # perform image classification using Obsidian 3b vision
30
+ #image_features = self.vision.encode_image(image)
31
+ #image_embedding = self.vision.extract_feature(image_features)
32
+ #image_caption = self.vision.generate_caption(image_embedding)
33
+
34
+ # combine text and image captions
35
+ #combined_captions = [inputs, image_caption]
36
+
37
+ # run text classification on combined captions
38
+ #prediction = self.pipeline(combined_captions, temperature=0.33, num_beams=5, stop=[], do_sample=True)
39
+
40
+ #return prediction
41
+
42
+
43
+ #else:
44
+ # run text classification on plain text input
45
+ # prediction = self.pipeline(inputs, temperature=0.33, num_beams=5, stop=[], do_sample=True)
46
+
47
+ # return prediction
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ llama-cpp-python