diabolic6045 commited on
Commit
b13a765
·
verified ·
1 Parent(s): e387e82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -27
app.py CHANGED
@@ -1,13 +1,3 @@
1
- import gradio as gr
2
- import os
3
- import torch
4
- from transformers import AutoTokenizer, AutoModelForVision2Seq, AutoImageProcessor
5
- from PIL import Image
6
- import requests
7
- import spaces
8
- from huggingface_hub import login
9
- login(os.environ["HF_KEY"])
10
-
11
  # Load the model and tokenizer
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
@@ -42,7 +32,6 @@ def build_prompt(task="caption", input=None, sep="\n\n### "):
42
  return p
43
 
44
  # Define the function to generate text from the image and prompt
45
- @spaces.GPU(duration=120)
46
  def generate_text(image, task, input_text=None):
47
  prompt = build_prompt(task=task, input=input_text)
48
  inputs = processor(images=image, return_tensors="pt")
@@ -60,21 +49,21 @@ def generate_text(image, task, input_text=None):
60
  return generated_text
61
 
62
  # Define the Gradio interface
63
- image_input = gr.Image(label="Upload an image")
64
- task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
65
- text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
66
-
67
- output = gr.Textbox(label="Generated text")
 
 
 
 
 
 
68
 
69
- interface = gr.Interface(
70
- fn=generate_text,
71
- inputs=[image_input, task_input, text_input],
72
- outputs=output,
73
- examples=[
74
- ["examples/example_image.jpg", "caption", None],
75
- ["examples/example_image.jpg", "tag", "河津桜、青空"],
76
- ["examples/example_image.jpg", "vqa", "OCRはできますか?"],
77
- ],
78
- )
79
 
80
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
1
  # Load the model and tokenizer
2
  device = "cuda" if torch.cuda.is_available() else "cpu"
3
  model = AutoModelForVision2Seq.from_pretrained("stabilityai/japanese-stable-vlm", trust_remote_code=True, device_map='auto')
 
32
  return p
33
 
34
  # Define the function to generate text from the image and prompt
 
35
  def generate_text(image, task, input_text=None):
36
  prompt = build_prompt(task=task, input=input_text)
37
  inputs = processor(images=image, return_tensors="pt")
 
49
  return generated_text
50
 
51
  # Define the Gradio interface
52
+ with gr.Blocks() as demo:
53
+ chatbot = gr.Chatbot([], elem_id="chatbot", show_copy_button=True)
54
+ with gr.Box():
55
+ with gr.Row():
56
+ image_input = gr.Image(label="Upload an image")
57
+ task_input = gr.Radio(choices=["caption", "tag", "vqa"], value="caption", label="Select a task")
58
+ text_input = gr.Textbox(label="Enter text (for tag or vqa tasks)")
59
+ submit_btn = gr.Button("Submit")
60
+ inputs = [image_input, task_input, text_input]
61
+ outputs = chatbot
62
+ submit_btn.click(generate_text, inputs, outputs, api_name="generate_text")
63
 
64
+ # Event listeners
65
+ chatbot.change(lambda x: print(f"Chatbot changed: {x}"), chatbot, chatbot)
66
+ chatbot.select(lambda x: print(f"Chatbot selected: {x.value}, {x.selected}"), None, chatbot)
67
+ chatbot.like(lambda x: print(f"Liked/Disliked: {x.index}, {x.value}, {x.liked}"), None, chatbot)
 
 
 
 
 
 
68
 
69
+ demo.launch()