Yehor commited on
Commit
b7cbc3b
·
verified ·
1 Parent(s): e390a97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import spaces
2
  import gradio as gr
3
 
4
- from unsloth import FastLanguageModel
 
5
 
6
  max_seq_length = 2048
7
  dtype = (
@@ -9,21 +10,12 @@ dtype = (
9
  )
10
  load_in_4bit = True
11
 
12
- model = None
13
-
14
- @spaces.GPU
15
- def load_model():
16
- if model is None:
17
- model, tokenizer = FastLanguageModel.from_pretrained(
18
- model_name="ua-l/gemma-2-9b-legal-uk",
19
- max_seq_length=max_seq_length,
20
- dtype=dtype,
21
- load_in_4bit=load_in_4bit,
22
- )
23
-
24
- load_model()
25
 
26
- FastLanguageModel.for_inference(model)
27
 
28
 
29
  @spaces.GPU
@@ -35,7 +27,7 @@ def predict(question):
35
  ### Answer:
36
  '''], return_tensors = "pt").to("cuda")
37
 
38
- outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
39
 
40
  results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
41
 
@@ -43,7 +35,7 @@ def predict(question):
43
 
44
  inputs = gr.Textbox(lines=2, label="Enter a question", value="Як отримати виплати ВПО?")
45
 
46
- outputs = gr.JSON(label="Answer")
47
 
48
  demo = gr.Interface(fn=predict, inputs=inputs, outputs=outputs)
49
  demo.launch()
 
1
  import spaces
2
  import gradio as gr
3
 
4
+ from peft import AutoPeftModelForCausalLM
5
+ from transformers import AutoTokenizer
6
 
7
  max_seq_length = 2048
8
  dtype = (
 
10
  )
11
  load_in_4bit = True
12
 
13
+ model = AutoPeftModelForCausalLM.from_pretrained(
14
+ "ua-l/gemma-2-9b-legal-uk",
15
+ load_in_4bit = load_in_4bit,
16
+ )
17
+ tokenizer = AutoTokenizer.from_pretrained("ua-l/gemma-2-9b-legal-uk")
 
 
 
 
 
 
 
 
18
 
 
19
 
20
 
21
  @spaces.GPU
 
27
  ### Answer:
28
  '''], return_tensors = "pt").to("cuda")
29
 
30
+ outputs = model.generate(**inputs, max_new_tokens = 128)
31
 
32
  results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
33
 
 
35
 
36
  inputs = gr.Textbox(lines=2, label="Enter a question", value="Як отримати виплати ВПО?")
37
 
38
+ outputs = gr.Textbox(label="Answer")
39
 
40
  demo = gr.Interface(fn=predict, inputs=inputs, outputs=outputs)
41
  demo.launch()