hadiyya commited on
Commit
fccb324
Β·
verified Β·
1 Parent(s): 98d6c17

Define stream_object_detection

Browse files
Files changed (1) hide show
  1. app.py +75 -3
app.py CHANGED
@@ -1,3 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  with gr.Blocks() as app:
@@ -20,11 +94,9 @@ with gr.Blocks() as app:
20
  with gr.Column():
21
  output_video = gr.Video(label="Processed Video", streaming=True, autoplay=True)
22
 
23
- video.upload(
24
  fn=stream_object_detection,
25
  inputs=[video, conf_threshold],
26
  outputs=[output_video],
27
  )
28
 
29
-
30
- # This is from: https://www.gradio.app/guides/object-detection-from-video
 
1
+ import spaces
2
+ import cv2
3
+ from PIL import Image
4
+ import torch
5
+ import time
6
+ import numpy as np
7
+ import uuid
8
+
9
+ from draw_boxes import draw_bounding_boxes
10
+ from transformers import AutoImageProcessor, AutoModelForObjectDetection # Added import
11
+
12
+ SUBSAMPLE = 2
13
+
14
+ # Initialize image processor and model
15
+ image_processor = AutoImageProcessor.from_pretrained("PekingU/rtdetr_r101vd_coco_o365")
16
+ model = AutoModelForObjectDetection.from_pretrained("PekingU/rtdetr_r101vd_coco_o365").to("cuda")
17
+
18
+ @spaces.GPU
19
+ def stream_object_detection(video, conf_threshold):
20
+ cap = cv2.VideoCapture(video)
21
+
22
+ video_codec = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
23
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
24
+
25
+ desired_fps = fps // SUBSAMPLE
26
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) // 2
27
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) // 2
28
+
29
+ iterating, frame = cap.read()
30
+
31
+ n_frames = 0
32
+
33
+
34
+ output_video_name = f"output_{uuid.uuid4()}.mp4"
35
+
36
+ # Output Video
37
+ output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
38
+ batch = []
39
+
40
+ while iterating:
41
+ frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
42
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
43
+ if n_frames % SUBSAMPLE == 0:
44
+ batch.append(frame)
45
+ if len(batch) == 2 * desired_fps:
46
+ inputs = image_processor(images=batch, return_tensors="pt").to("cuda")
47
+
48
+ with torch.no_grad():
49
+ outputs = model(**inputs)
50
+
51
+ boxes = image_processor.post_process_object_detection(
52
+ outputs,
53
+ target_sizes=torch.tensor([(height, width)] * len(batch)),
54
+ threshold=conf_threshold)
55
+
56
+ for i, (array, box) in enumerate(zip(batch, boxes)):
57
+ pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
58
+ frame = np.array(pil_image)
59
+ # Convert RGB to BGR
60
+ frame = frame[:, :, ::-1].copy()
61
+ output_video.write(frame)
62
+
63
+ batch = []
64
+ output_video.release()
65
+ yield output_video_name
66
+ output_video_name = f"output_{uuid.uuid4()}.mp4"
67
+ output_video = cv2.VideoWriter(output_video_name, video_codec, desired_fps, (width, height)) # type: ignore
68
+
69
+ iterating, frame = cap.read()
70
+ n_frames += 1
71
+
72
+ cap.release()
73
+ output_video.release()
74
+
75
  import gradio as gr
76
 
77
  with gr.Blocks() as app:
 
94
  with gr.Column():
95
  output_video = gr.Video(label="Processed Video", streaming=True, autoplay=True)
96
 
97
+ video.change(
98
  fn=stream_object_detection,
99
  inputs=[video, conf_threshold],
100
  outputs=[output_video],
101
  )
102