app.py more robust and faster encode_image() with opencv
Browse files- app.py +66 -17
- pyproject.toml +2 -1
app.py
CHANGED
@@ -8,6 +8,7 @@ import time
|
|
8 |
from io import BytesIO
|
9 |
import logging
|
10 |
import traceback
|
|
|
11 |
|
12 |
import gradio as gr
|
13 |
import numpy as np
|
@@ -42,26 +43,74 @@ def encode_audio(data: np.ndarray) -> dict:
|
|
42 |
logger.error(f"Error encoding audio: {e}")
|
43 |
raise
|
44 |
|
45 |
-
def encode_image(data: np.ndarray) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
if not isinstance(data, np.ndarray):
|
47 |
-
raise TypeError("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
try:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
pil_image = Image.fromarray(data)
|
53 |
-
pil_image.save(output_bytes, "JPEG")
|
54 |
-
bytes_data = output_bytes.getvalue()
|
55 |
-
base64_str = str(base64.b64encode(bytes_data), "utf-8")
|
56 |
-
return {"mime_type": "image/jpeg", "data": base64_str}
|
57 |
except Exception as e:
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
def check_twilio_availability_sync() -> bool:
|
67 |
"""Checks Twilio TURN server availability (synchronous version)."""
|
|
|
8 |
from io import BytesIO
|
9 |
import logging
|
10 |
import traceback
|
11 |
+
import cv2
|
12 |
|
13 |
import gradio as gr
|
14 |
import numpy as np
|
|
|
43 |
logger.error(f"Error encoding audio: {e}")
|
44 |
raise
|
45 |
|
46 |
+
def encode_image(data: np.ndarray, quality: int = 85) -> dict:
|
47 |
+
"""
|
48 |
+
Encodes a NumPy array (image) to a JPEG, Base64-encoded UTF-8 string using OpenCV.
|
49 |
+
Handles various input data types.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
data: A NumPy array of shape (n, n, 3).
|
53 |
+
quality: JPEG quality (0-100).
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
A dictionary with keys "mime_type" and "data".
|
57 |
+
|
58 |
+
Raises:
|
59 |
+
TypeError: If input is not a NumPy array.
|
60 |
+
ValueError: If input shape is incorrect or contains NaN/Inf.
|
61 |
+
Exception: If JPEG encoding fails.
|
62 |
+
"""
|
63 |
+
|
64 |
+
# Input Validation (shape and dimensions)
|
65 |
if not isinstance(data, np.ndarray):
|
66 |
+
raise TypeError("Input must be a NumPy array.")
|
67 |
+
if data.ndim != 3 or data.shape[2] != 3:
|
68 |
+
raise ValueError("Input array must have shape (n, n, 3).")
|
69 |
+
if 0 in data.shape:
|
70 |
+
raise ValueError("Input array cannot have a dimension of size 0.")
|
71 |
+
|
72 |
+
# Handle NaN/Inf (regardless of data type)
|
73 |
+
if np.any(np.isnan(data)) or np.any(np.isinf(data)):
|
74 |
+
raise ValueError("Input array contains NaN or Inf")
|
75 |
+
|
76 |
+
# Data Type Handling and Scaling
|
77 |
+
if np.issubdtype(data.dtype, np.floating):
|
78 |
+
# Floating-point input: Use cv2.normalize (as before)
|
79 |
+
if np.min(data) == np.max(data):
|
80 |
+
scaled_data = np.full(data.shape, 0, dtype=np.uint8) if np.min(data) <=0 else np.full(data.shape, 255, dtype=np.uint8)
|
81 |
+
else:
|
82 |
+
scaled_data = cv2.normalize(data, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
|
83 |
+
|
84 |
+
elif np.issubdtype(data.dtype, np.integer):
|
85 |
+
# Integer input
|
86 |
+
if data.dtype == np.uint8:
|
87 |
+
# uint8: Skip normalization *unless* all values are the same
|
88 |
+
if np.min(data) == np.max(data):
|
89 |
+
scaled_data = np.full(data.shape, 0, dtype=np.uint8) if np.min(data) == 0 else np.full(data.shape, 255, dtype=np.uint8)
|
90 |
+
else:
|
91 |
+
scaled_data = np.ascontiguousarray(data) # Already uint8, just ensure contiguity
|
92 |
+
else:
|
93 |
+
# Other integer types: Convert to float64 for safe normalization
|
94 |
+
scaled_data = cv2.normalize(data.astype(np.float64), None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
|
95 |
+
else:
|
96 |
+
raise TypeError("Input array must have a floating-point or integer data type.")
|
97 |
+
|
98 |
+
scaled_data = np.ascontiguousarray(scaled_data)
|
99 |
+
|
100 |
+
|
101 |
+
# JPEG Encoding (with quality control and error handling)
|
102 |
try:
|
103 |
+
retval, buf = cv2.imencode(".jpg", scaled_data, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
|
104 |
+
if not retval:
|
105 |
+
raise Exception("cv2.imencode failed")
|
|
|
|
|
|
|
|
|
|
|
106 |
except Exception as e:
|
107 |
+
raise Exception(f"JPEG encoding failed: {e}")
|
108 |
+
|
109 |
+
# Base64 Encoding
|
110 |
+
jpeg_bytes = np.array(buf).tobytes()
|
111 |
+
base64_str = base64.b64encode(jpeg_bytes).decode('utf-8')
|
112 |
+
|
113 |
+
return {"mime_type": "image/jpeg", "data": base64_str}
|
114 |
|
115 |
def check_twilio_availability_sync() -> bool:
|
116 |
"""Checks Twilio TURN server availability (synchronous version)."""
|
pyproject.toml
CHANGED
@@ -7,5 +7,6 @@ requires-python = ">=3.13"
|
|
7 |
dependencies = [
|
8 |
"gradio_webrtc==0.0.28",
|
9 |
"google-genai==0.3.0",
|
10 |
-
"twilio"
|
|
|
11 |
]
|
|
|
7 |
dependencies = [
|
8 |
"gradio_webrtc==0.0.28",
|
9 |
"google-genai==0.3.0",
|
10 |
+
"twilio",
|
11 |
+
"opencv-python"
|
12 |
]
|