Spaces:

freddyaboulton
/

gemini-audio-video-chat

Running

App Files Files Community

ahundt commited on Feb 21

Commit

fb9d6a0

1 Parent(s): 7bc94a1

app.py more robust and faster encode_image() with opencv

Browse files

Files changed (2) hide show

app.py +66 -17
pyproject.toml +2 -1

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 from io import BytesIO
 import logging
 import traceback
 import gradio as gr
 import numpy as np
@@ -42,26 +43,74 @@ def encode_audio(data: np.ndarray) -> dict:
         logger.error(f"Error encoding audio: {e}")
         raise
-def encode_image(data: np.ndarray) -> dict:
     if not isinstance(data, np.ndarray):
-        raise TypeError("encode_image expected a numpy.ndarray")
     try:
-        # Ensure the frame is float32
-        data = data.astype(np.float32)
-        with BytesIO() as output_bytes:
-            pil_image = Image.fromarray(data)
-            pil_image.save(output_bytes, "JPEG")
-            bytes_data = output_bytes.getvalue()
-        base64_str = str(base64.b64encode(bytes_data), "utf-8")
-        return {"mime_type": "image/jpeg", "data": base64_str}
     except Exception as e:
-        # log traceback
-        logger.error(traceback.format_exc())
-        logger.error(f"Error encoding image: {e}")
-        raise
-    except ValueError as e: # PIL error
-        logger.exception(f"ValueError when creating image: {e}")
-        raise
 def check_twilio_availability_sync() -> bool:
     """Checks Twilio TURN server availability (synchronous version)."""

 from io import BytesIO
 import logging
 import traceback
+import cv2
 import gradio as gr
 import numpy as np
         logger.error(f"Error encoding audio: {e}")
         raise
+def encode_image(data: np.ndarray, quality: int = 85) -> dict:
+    """
+    Encodes a NumPy array (image) to a JPEG, Base64-encoded UTF-8 string using OpenCV.
+    Handles various input data types.
+    Args:
+        data: A NumPy array of shape (n, n, 3).
+        quality: JPEG quality (0-100).
+    Returns:
+        A dictionary with keys "mime_type" and "data".
+    Raises:
+        TypeError: If input is not a NumPy array.
+        ValueError: If input shape is incorrect or contains NaN/Inf.
+        Exception: If JPEG encoding fails.
+    """
+    # Input Validation (shape and dimensions)
     if not isinstance(data, np.ndarray):
+        raise TypeError("Input must be a NumPy array.")
+    if data.ndim != 3 or data.shape[2] != 3:
+        raise ValueError("Input array must have shape (n, n, 3).")
+    if 0 in data.shape:
+        raise ValueError("Input array cannot have a dimension of size 0.")
+    # Handle NaN/Inf (regardless of data type)
+    if np.any(np.isnan(data)) or np.any(np.isinf(data)):
+        raise ValueError("Input array contains NaN or Inf")
+    # Data Type Handling and Scaling
+    if np.issubdtype(data.dtype, np.floating):
+        # Floating-point input: Use cv2.normalize (as before)
+        if np.min(data) == np.max(data):
+            scaled_data = np.full(data.shape, 0, dtype=np.uint8) if np.min(data) <=0 else np.full(data.shape, 255, dtype=np.uint8)
+        else:
+            scaled_data = cv2.normalize(data, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
+    elif np.issubdtype(data.dtype, np.integer):
+        # Integer input
+        if data.dtype == np.uint8:
+            # uint8: Skip normalization *unless* all values are the same
+            if np.min(data) == np.max(data):
+                scaled_data = np.full(data.shape, 0, dtype=np.uint8) if np.min(data) == 0 else np.full(data.shape, 255, dtype=np.uint8)
+            else:
+                scaled_data = np.ascontiguousarray(data) # Already uint8, just ensure contiguity
+        else:
+            # Other integer types: Convert to float64 for safe normalization
+            scaled_data = cv2.normalize(data.astype(np.float64), None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
+    else:
+        raise TypeError("Input array must have a floating-point or integer data type.")
+    scaled_data = np.ascontiguousarray(scaled_data)
+    # JPEG Encoding (with quality control and error handling)
     try:
+        retval, buf = cv2.imencode(".jpg", scaled_data, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
+        if not retval:
+            raise Exception("cv2.imencode failed")
     except Exception as e:
+        raise Exception(f"JPEG encoding failed: {e}")
+    # Base64 Encoding
+    jpeg_bytes = np.array(buf).tobytes()
+    base64_str = base64.b64encode(jpeg_bytes).decode('utf-8')
+    return {"mime_type": "image/jpeg", "data": base64_str}
 def check_twilio_availability_sync() -> bool:
     """Checks Twilio TURN server availability (synchronous version)."""

pyproject.toml CHANGED Viewed

@@ -7,5 +7,6 @@ requires-python = ">=3.13"
 dependencies = [
     "gradio_webrtc==0.0.28",
     "google-genai==0.3.0",
-    "twilio"
 ]

 dependencies = [
     "gradio_webrtc==0.0.28",
     "google-genai==0.3.0",
+    "twilio",
+    "opencv-python"
 ]