joonhyun.jeong commited on
Commit
668bc57
·
1 Parent(s): 6bb2c3b

open proxydet

Browse files
Files changed (1) hide show
  1. app.py +30 -25
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import torch
2
  import cv2
 
3
  import gradio as gr
4
  import numpy as np
5
  from transformers import OwlViTProcessor, OwlViTForObjectDetection
6
 
 
 
7
 
8
  # Use GPU if available
9
  if torch.cuda.is_available():
@@ -48,29 +51,31 @@ def query_image(img, text_queries, score_threshold):
48
  )
49
  return img
50
 
 
 
51
 
52
- description = """
53
- Gradio demo for <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
54
- introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
55
- with Vision Transformers</a>.
56
- \n\nYou can use OWL-ViT to query images with text descriptions of any object.
57
- To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for. You
58
- can also use the score threshold slider to set a threshold to filter out low probability predictions.
59
- \n\nOWL-ViT is trained on text templates,
60
- hence you can get better predictions by querying the image with text templates used in training the original model: *"photo of a star-spangled banner"*,
61
- *"image of a shoe"*. Refer to the <a href="https://arxiv.org/abs/2103.00020">CLIP</a> paper to see the full list of text templates used to augment the training data.
62
- \n\n<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab demo</a>
63
- """
64
- demo = gr.Interface(
65
- query_image,
66
- inputs=[gr.Image(), "text", gr.Slider(0, 1, value=0.1)],
67
- outputs="image",
68
- title="Zero-Shot Object Detection with OWL-ViT",
69
- description=description,
70
- examples=[
71
- ["assets/astronaut.png", "human face, rocket, star-spangled banner, nasa badge", 0.11],
72
- ["assets/coffee.png", "coffee mug, spoon, plate", 0.1],
73
- ["assets/butterflies.jpeg", "orange butterfly", 0.3],
74
- ],
75
- )
76
- demo.launch()
 
1
  import torch
2
  import cv2
3
+ import os
4
  import gradio as gr
5
  import numpy as np
6
  from transformers import OwlViTProcessor, OwlViTForObjectDetection
7
 
8
+ def setup():
9
+ os.system("python3 -m pip install 'git+https://github.com/facebookresearch/detectron2.git'")
10
 
11
  # Use GPU if available
12
  if torch.cuda.is_available():
 
51
  )
52
  return img
53
 
54
+ if __name__ == "__main__":
55
+ setup()
56
 
57
+ description = """
58
+ Gradio demo for <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
59
+ introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
60
+ with Vision Transformers</a>.
61
+ \n\nYou can use OWL-ViT to query images with text descriptions of any object.
62
+ To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for. You
63
+ can also use the score threshold slider to set a threshold to filter out low probability predictions.
64
+ \n\nOWL-ViT is trained on text templates,
65
+ hence you can get better predictions by querying the image with text templates used in training the original model: *"photo of a star-spangled banner"*,
66
+ *"image of a shoe"*. Refer to the <a href="https://arxiv.org/abs/2103.00020">CLIP</a> paper to see the full list of text templates used to augment the training data.
67
+ \n\n<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab demo</a>
68
+ """
69
+ demo = gr.Interface(
70
+ query_image,
71
+ inputs=[gr.Image(), "text", gr.Slider(0, 1, value=0.1)],
72
+ outputs="image",
73
+ title="Zero-Shot Object Detection with OWL-ViT",
74
+ description=description,
75
+ examples=[
76
+ ["assets/astronaut.png", "human face, rocket, star-spangled banner, nasa badge", 0.11],
77
+ ["assets/coffee.png", "coffee mug, spoon, plate", 0.1],
78
+ ["assets/butterflies.jpeg", "orange butterfly", 0.3],
79
+ ],
80
+ )
81
+ demo.launch()