Spaces:

OussamaElfila21
/

advances

Sleeping

App Files Files Community

OussamaElfila21 commited on Mar 22

Commit

5d92054

verified ·

1 Parent(s): c300ae3

Upload 35 files

Browse files

Files changed (36) hide show

.gitattributes +1 -0
Dockerfile +36 -0
api_documentation.html +612 -0
app.py +379 -0
config.py +12 -0
data/models/lstm_scaler.pkl +3 -0
data/models/pretrained_lstm.pth +3 -0
models/__init__.py +0 -0
models/__pycache__/__init__.cpython-311.pyc +0 -0
models/__pycache__/detr_model.cpython-311.pyc +0 -0
models/__pycache__/distance_prediction.cpython-311.pyc +0 -0
models/__pycache__/glpn_model.cpython-311.pyc +0 -0
models/__pycache__/lstm_model.cpython-311.pyc +0 -0
models/__pycache__/predict_z_location_single_row_lstm.cpython-311.pyc +0 -0
models/detr_model.py +117 -0
models/glpn_model.py +51 -0
models/lstm_model.py +78 -0
models/predict_z_location_single_row_lstm.py +48 -0
requirements.txt +20 -0
try_page.html +778 -0
utils/JSON_output.py +43 -0
utils/__init__.py +0 -0
utils/__pycache__/JSON_output.cpython-311.pyc +0 -0
utils/__pycache__/__init__.cpython-311.pyc +0 -0
utils/__pycache__/processing.cpython-311.pyc +0 -0
utils/build/lib.win-amd64-cpython-311/processing_cy.cp311-win_amd64.pyd +0 -0
utils/build/temp.win-amd64-cpython-311/Release/processing.obj +3 -0
utils/build/temp.win-amd64-cpython-311/Release/processing_cy.cp311-win_amd64.exp +0 -0
utils/build/temp.win-amd64-cpython-311/Release/processing_cy.cp311-win_amd64.lib +0 -0
utils/processing.c +0 -0
utils/processing.html +0 -0
utils/processing.py +59 -0
utils/processing.pyx +101 -0
utils/processing_cy.cp311-win_amd64.pyd +0 -0
utils/setup.py +18 -0
utils/visualization.py +42 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+utils/build/temp.win-amd64-cpython-311/Release/processing.obj filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+# Use a specific Python version
+FROM python:3.11.10-slim
+# Set the working directory inside the container
+WORKDIR /usr/src/app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    gcc \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Upgrade pip
+RUN pip install --upgrade pip
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+# Copy your application code into the container
+COPY . .
+# Build Cython extensions
+RUN cd utils && python setup.py build_ext --inplace
+# Create necessary directories if they don't exist
+RUN mkdir -p data/models
+# Expose the port for FastAPI application
+EXPOSE 8000
+# Specify the command to run the application with Gunicorn
+CMD ["gunicorn", "app:app", "--workers", "4", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"]

api_documentation.html ADDED Viewed

	@@ -0,0 +1,612 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Real-Time Image Processing API Documentation</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/swagger-ui/4.18.3/swagger-ui.min.css">
+    <style>
+        body {
+          font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
+          margin: 0;
+          padding: 0;
+          color: #333;
+        }
+        .container {
+          max-width: 1200px;
+          margin: 0 auto;
+          padding: 20px;
+        }
+        header {
+          background-color: #252f3f;
+          color: white;
+          padding: 20px;
+          box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        header h1 {
+          margin: 0;
+          font-size: 2em;
+        }
+        header p {
+          margin-top: 10px;
+          opacity: 0.8;
+        }
+        .endpoint-cards {
+          display: flex;
+          flex-wrap: wrap;
+          gap: 20px;
+          margin-top: 30px;
+        }
+        .endpoint-card {
+          background-color: white;
+          border-radius: 8px;
+          box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+          padding: 20px;
+          flex: 1 1 300px;
+          transition: transform 0.2s, box-shadow 0.2s;
+        }
+        .endpoint-card:hover {
+          transform: translateY(-5px);
+          box-shadow: 0 5px 15px rgba(0,0,0,0.15);
+        }
+        .http-endpoint {
+          border-left: 5px solid #38a169;
+        }
+        .ws-endpoint {
+          border-left: 5px solid #3182ce;
+        }
+        .method {
+          display: inline-block;
+          padding: 5px 10px;
+          border-radius: 4px;
+          font-weight: bold;
+          margin-right: 10px;
+        }
+        .post {
+          background-color: #38a169;
+          color: white;
+        }
+        .ws {
+          background-color: #3182ce;
+          color: white;
+        }
+        .endpoint-title {
+          font-size: 1.2em;
+          margin-bottom: 15px;
+          display: flex;
+          align-items: center;
+        }
+        .section {
+          margin-top: 40px;
+          margin-bottom: 30px;
+        }
+        h2 {
+          border-bottom: 1px solid #eee;
+          padding-bottom: 10px;
+          color: #252f3f;
+        }
+        .code {
+          background-color: #f7fafc;
+          border-radius: 4px;
+          padding: 15px;
+          font-family: monospace;
+          overflow-x: auto;
+          margin: 15px 0;
+        }
+        .parameter-table, .response-table {
+          width: 100%;
+          border-collapse: collapse;
+          margin: 15px 0;
+        }
+        .parameter-table th, .parameter-table td,
+        .response-table th, .response-table td {
+          text-align: left;
+          padding: 10px;
+          border-bottom: 1px solid #eee;
+        }
+        .parameter-table th, .response-table th {
+          background-color: #f7fafc;
+        }
+        .try-it {
+          margin-top: 30px;
+          padding-top: 30px;
+          border-top: 1px solid #eee;
+        }
+        .swagger-ui .wrapper { max-width: 100%; }
+        #swagger-ui {
+          margin-top: 30px;
+          border: 1px solid #eee;
+          border-radius: 8px;
+          overflow: hidden;
+        }
+        .info-box {
+          background-color: #ebf8ff;
+          border-left: 5px solid #3182ce;
+          padding: 15px;
+          margin: 20px 0;
+          border-radius: 4px;
+        }
+        .response-example {
+          background-color: #f0fff4;
+          border-left: 5px solid #38a169;
+          padding: 15px;
+          border-radius: 4px;
+          margin-top: 20px;
+        }
+        /* Updated tabs styles */
+        .tabs {
+          display: flex;
+          margin-top: 30px;
+          border-bottom: 1px solid #e2e8f0;
+          align-items: center;
+        }
+        .tab {
+          padding: 10px 20px;
+          cursor: pointer;
+          border-bottom: 2px solid transparent;
+          transition: all 0.2s ease;
+        }
+        .tab.active {
+          border-bottom: 2px solid #3182ce;
+          color: #3182ce;
+          font-weight: bold;
+        }
+        /* New try-button styles */
+        .try-button {
+          margin-left: auto;
+          display: flex;
+          align-items: center;
+          padding: 10px 15px;
+          font-size: 14px;
+          font-weight: bold;
+          border-radius: 4px;
+          background-color: #4299e1;
+          color: white;
+          text-decoration: none;
+          border: none;
+          transition: background-color 0.2s;
+        }
+        .try-button:hover {
+          background-color: #3182ce;
+        }
+        .try-button svg {
+          margin-left: 8px;
+          height: 16px;
+          width: 16px;
+        }
+        .tab-content {
+          display: none;
+          padding: 20px 0;
+        }
+        .tab-content.active {
+          display: block;
+        }
+        button {
+          background-color: #4299e1;
+          color: white;
+          border: none;
+          padding: 10px 15px;
+          border-radius: 4px;
+          cursor: pointer;
+          font-weight: bold;
+          transition: background-color 0.2s;
+        }
+        button:hover {
+          background-color: #3182ce;
+        }
+      </style>
+</head>
+<body>
+    <header>
+        <div class="container">
+            <h1>Advanced Driver Vision Assistance with Near Collision Estimation System</h1>
+            <p>API for object detection, depth estimation, and distance prediction using computer vision models</p>
+        </div>
+    </header>
+    <div class="container">
+        <section class="section">
+            <h2>Overview</h2>
+            <p>This API provides access to advanced computer vision models for real-time image processing. It leverages:</p>
+            <ul>
+                <li><strong>DETR (DEtection TRansformer)</strong> - For accurate object detection</li>
+                <li><strong>GLPN (Global-Local Path Networks)</strong> - For depth estimation</li>
+                <li><strong>LSTM Model</strong> - For Z-location prediction</li>
+            </ul>
+            <p>The API supports both HTTP and WebSocket protocols:</p>
+            <div class="endpoint-cards">
+                <div class="endpoint-card http-endpoint">
+                    <div class="endpoint-title">
+                        <span class="method post">POST</span>
+                        <span>/api/predict</span>
+                    </div>
+                    <p>Process a single image via HTTP request</p>
+                </div>
+                <div class="endpoint-card ws-endpoint">
+                    <div class="endpoint-title">
+                        <span class="method ws">WS</span>
+                        <span>/ws/predict</span>
+                    </div>
+                    <p>Stream images for real-time processing via WebSocket</p>
+                </div>
+            </div>
+        </section>
+        <div class="tabs">
+            <div class="tab active" onclick="switchTab('http-api')">HTTP API</div>
+            <div class="tab" onclick="switchTab('websocket-api')">WebSocket API</div>
+            <div class="tab">
+                <a href="./try_page" class="ml-2 inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md shadow-sm text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 transition-all duration-200">
+                    Try it
+                    <svg xmlns="http://www.w3.org/2000/svg" class="ml-1 h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                      <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14 5l7 7m0 0l-7 7m7-7H3" />
+                    </svg>
+                  </a>
+            </div>
+        </div>
+        <div id="http-api" class="tab-content active">
+            <section class="section">
+                <h2>HTTP API Reference</h2>
+                <h3>POST /api/predict</h3>
+                <p>Process a single image for object detection, depth estimation, and distance prediction.</p>
+                <h4>Request</h4>
+                <p><strong>Content-Type:</strong> multipart/form-data</p>
+                <table class="parameter-table">
+                    <tr>
+                        <th>Parameter</th>
+                        <th>Type</th>
+                        <th>Required</th>
+                        <th>Description</th>
+                    </tr>
+                    <tr>
+                        <td>file</td>
+                        <td>File</td>
+                        <td>Yes</td>
+                        <td>The image file to process (JPEG, PNG)</td>
+                    </tr>
+                </table>
+                <h4>Request Example</h4>
+                <div class="code">
+                    # Python example using requests
+                    import requests
+                    url = "http://localhost:8000/api/predict"
+                    files = {"file": open("image.jpg", "rb")}
+                    response = requests.post(url, files=files)
+                    data = response.json()
+                    print(data)
+                </div>
+                <h4>Response</h4>
+                <p>Returns a JSON object containing:</p>
+                <table class="response-table">
+                    <tr>
+                        <th>Field</th>
+                        <th>Type</th>
+                        <th>Description</th>
+                    </tr>
+                    <tr>
+                        <td>objects</td>
+                        <td>Array</td>
+                        <td>Array of detected objects with their properties</td>
+                    </tr>
+                    <tr>
+                        <td>objects[].class</td>
+                        <td>String</td>
+                        <td>Class of the detected object (e.g., 'car', 'person')</td>
+                    </tr>
+                    <tr>
+                        <td>objects[].distance_estimated</td>
+                        <td>Number</td>
+                        <td>Estimated distance of the object</td>
+                    </tr>
+                    <tr>
+                        <td>objects[].features</td>
+                        <td>Object</td>
+                        <td>Features used for prediction (bounding box, depth information)</td>
+                    </tr>
+                    <tr>
+                        <td>frame_id</td>
+                        <td>Number</td>
+                        <td>ID of the processed frame (0 for HTTP requests)</td>
+                    </tr>
+                    <tr>
+                        <td>timings</td>
+                        <td>Object</td>
+                        <td>Processing time metrics for each step</td>
+                    </tr>
+                </table>
+                <div class="response-example">
+                    <h4>Response Example</h4>
+                    <pre class="code">
+{
+  "objects": [
+    {
+      "class": "car",
+      "distance_estimated": 15.42,
+      "features": {
+        "xmin": 120.5,
+        "ymin": 230.8,
+        "xmax": 350.2,
+        "ymax": 480.3,
+        "mean_depth": 0.75,
+        "depth_mean_trim": 0.72,
+        "depth_median": 0.71,
+        "width": 229.7,
+        "height": 249.5
+      }
+    },
+    {
+      "class": "person",
+      "distance_estimated": 8.76,
+      "features": {
+        "xmin": 450.1,
+        "ymin": 200.4,
+        "xmax": 510.8,
+        "ymax": 380.2,
+        "mean_depth": 0.58,
+        "depth_mean_trim": 0.56,
+        "depth_median": 0.55,
+        "width": 60.7,
+        "height": 179.8
+      }
+    }
+  ],
+  "frame_id": 0,
+  "timings": {
+    "decode_time": 0.015,
+    "models_time": 0.452,
+    "process_time": 0.063,
+    "json_time": 0.021,
+    "total_time": 0.551
+  }
+}
+                    </pre>
+                </div>
+                <h4>HTTP Status Codes</h4>
+                <table class="response-table">
+                    <tr>
+                        <th>Status Code</th>
+                        <th>Description</th>
+                    </tr>
+                    <tr>
+                        <td>200</td>
+                        <td>OK - Request was successful</td>
+                    </tr>
+                    <tr>
+                        <td>400</td>
+                        <td>Bad Request - Empty file or invalid format</td>
+                    </tr>
+                    <tr>
+                        <td>500</td>
+                        <td>Internal Server Error - Processing error</td>
+                    </tr>
+                </table>
+            </section>
+        </div>
+        <div id="websocket-api" class="tab-content">
+            <section class="section">
+                <h2>WebSocket API Reference</h2>
+                <h3>WebSocket /ws/predict</h3>
+                <p>
+                    Stream images for real-time processing and get instant results.
+                    Ideal for video feeds and applications requiring continuous processing.
+                </p>
+                <div class="info-box">
+                    <p><strong>Note:</strong> WebSocket offers better performance for real-time applications. Use this endpoint for processing video feeds or when you need to process multiple images in rapid succession.</p>
+                </div>
+                <h4>Connection</h4>
+                <div class="code">
+                    # JavaScript example
+                    const socket = new WebSocket('ws://localhost:8000/ws/predict');
+                    socket.onopen = function(e) {
+                      console.log('Connection established');
+                    };
+                    socket.onmessage = function(event) {
+                      const response = JSON.parse(event.data);
+                      console.log('Received:', response);
+                    };
+                    socket.onclose = function(event) {
+                      console.log('Connection closed');
+                    };
+                </div>
+                <h4>Sending Images</h4>
+                <p>Send binary image data directly over the WebSocket connection:</p>
+                <div class="code">
+                    // JavaScript example: Sending an image from canvas or file
+                    function sendImageFromCanvas(canvas) {
+                      canvas.toBlob(function(blob) {
+                        const reader = new FileReader();
+                        reader.onload = function() {
+                          socket.send(reader.result);
+                        };
+                        reader.readAsArrayBuffer(blob);
+                      }, 'image/jpeg');
+                    }
+                    // Or from input file
+                    fileInput.onchange = function() {
+                      const file = this.files[0];
+                      const reader = new FileReader();
+                      reader.onload = function() {
+                        socket.send(reader.result);
+                      };
+                      reader.readAsArrayBuffer(file);
+                    };
+                </div>
+                <h4>Response Format</h4>
+                <p>The WebSocket API returns the same JSON structure as the HTTP API, with incrementing frame_id values.</p>
+                <div class="response-example">
+                    <h4>Response Example</h4>
+                    <pre class="code">
+{
+  "objects": [
+    {
+      "class": "car",
+      "distance_estimated": 14.86,
+      "features": {
+        "xmin": 125.3,
+        "ymin": 235.1,
+        "xmax": 355.7,
+        "ymax": 485.9,
+        "mean_depth": 0.77,
+        "depth_mean_trim": 0.74,
+        "depth_median": 0.73,
+        "width": 230.4,
+        "height": 250.8
+      }
+    }
+  ],
+  "frame_id": 42,
+  "timings": {
+    "decode_time": 0.014,
+    "models_time": 0.445,
+    "process_time": 0.061,
+    "json_time": 0.020,
+    "total_time": 0.540
+  }
+}
+                    </pre>
+                </div>
+            </section>
+        </div>
+        <div id="try-it" class="tab-content">
+            <section class="section">
+                <h2>Try The API</h2>
+                <p>You can test the API directly using the interactive Swagger UI below:</p>
+                <div id="swagger-ui"></div>
+                <h3>Simple WebSocket Client</h3>
+                <p>Upload an image to test the WebSocket endpoint:</p>
+                <div>
+                    <input type="file" id="wsFileInput" accept="image/*">
+                    <button id="connectWsButton">Connect to WebSocket</button>
+                    <button id="disconnectWsButton" disabled>Disconnect</button>
+                </div>
+                <div>
+                    <p><strong>Status: </strong><span id="wsStatus">Disconnected</span></p>
+                    <p><strong>Last Response: </strong></p>
+                    <pre id="wsResponse" class="code" style="max-height: 300px; overflow-y: auto;"></pre>
+                </div>
+            </section>
+        </div>
+    </div>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/swagger-ui/4.18.3/swagger-ui-bundle.js"></script>
+    <script>
+        // Handle tab switching
+        function switchTab(tabId) {
+            document.querySelectorAll('.tab').forEach(tab => tab.classList.remove('active'));
+            document.querySelectorAll('.tab-content').forEach(content => content.classList.remove('active'));
+            document.querySelector(`.tab[onclick="switchTab('${tabId}')"]`).classList.add('active');
+            document.getElementById(tabId).classList.add('active');
+        }
+        // Initialize Swagger UI
+        window.onload = function() {
+            SwaggerUIBundle({
+                url: "/api/openapi.json",
+                dom_id: '#swagger-ui',
+                presets: [
+                    SwaggerUIBundle.presets.apis,
+                    SwaggerUIBundle.SwaggerUIStandalonePreset
+                ],
+                layout: "BaseLayout",
+                deepLinking: true
+            });
+            // WebSocket client setup
+            let socket = null;
+            const connectButton = document.getElementById('connectWsButton');
+            const disconnectButton = document.getElementById('disconnectWsButton');
+            const fileInput = document.getElementById('wsFileInput');
+            const statusElement = document.getElementById('wsStatus');
+            const responseElement = document.getElementById('wsResponse');
+            connectButton.addEventListener('click', () => {
+                if (socket) {
+                    socket.close();
+                }
+                const wsUrl = window.location.protocol === 'https:'
+                    ? `wss://${window.location.host}/ws/predict`
+                    : `ws://${window.location.host}/ws/predict`;
+                socket = new WebSocket(wsUrl);
+                socket.onopen = function() {
+                    statusElement.textContent = 'Connected';
+                    statusElement.style.color = 'green';
+                    connectButton.disabled = true;
+                    disconnectButton.disabled = false;
+                };
+                socket.onmessage = function(event) {
+                    const response = JSON.parse(event.data);
+                    responseElement.textContent = JSON.stringify(response, null, 2);
+                };
+                socket.onclose = function() {
+                    statusElement.textContent = 'Disconnected';
+                    statusElement.style.color = 'red';
+                    connectButton.disabled = false;
+                    disconnectButton.disabled = true;
+                    socket = null;
+                };
+                socket.onerror = function(error) {
+                    statusElement.textContent = 'Error: ' + error.message;
+                    statusElement.style.color = 'red';
+                };
+            });
+            disconnectButton.addEventListener('click', () => {
+                if (socket) {
+                    socket.close();
+                }
+            });
+            fileInput.addEventListener('change', () => {
+                if (!socket || socket.readyState !== WebSocket.OPEN) {
+                    alert('Please connect to WebSocket first');
+                    return;
+                }
+                const file = fileInput.files[0];
+                if (!file) return;
+                const reader = new FileReader();
+                reader.onload = function() {
+                    socket.send(reader.result);
+                };
+                reader.readAsArrayBuffer(file);
+            });
+        };
+    </script>
+</body>
+</html>

app.py ADDED Viewed

	@@ -0,0 +1,379 @@

+import joblib
+import uvicorn
+import xgboost as xgb
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, File, UploadFile, HTTPException
+from fastapi.responses import HTMLResponse
+from fastapi.responses import JSONResponse
+import asyncio
+import json
+import pickle
+import warnings
+import os
+import io
+import timeit
+from PIL import Image
+import numpy as np
+import cv2
+# Add this to your existing imports if not already present
+from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.openapi.utils import get_openapi
+from models.detr_model import DETR
+from models.glpn_model import GLPDepth
+from models.lstm_model import LSTM_Model
+from models.predict_z_location_single_row_lstm import predict_z_location_single_row_lstm
+from utils.processing import PROCESSING
+from config import CONFIG
+warnings.filterwarnings("ignore")
+# Initialize FastAPI app
+app = FastAPI(
+    title="Real-Time WebSocket Image Processing API",
+    description="API for object detection and depth estimation using WebSocket for real-time image processing.",
+)
+try:
+    # Load models and utilities
+    device = CONFIG['device']
+    print("Loading models...")
+    detr = DETR()  # Object detection model (DETR)
+    print("DETR model loaded.")
+    glpn = GLPDepth()  # Depth estimation model (GLPN)
+    print("GLPDepth model loaded.")
+    zlocE_LSTM = LSTM_Model()  # LSTM model for prediction (e.g., localization)
+    print("LSTM model loaded.")
+    lstm_scaler = pickle.load(open(CONFIG['lstm_scaler_path'], 'rb'))  # Load pre-trained scaler for LSTM
+    print("LSTM Scaler loaded.")
+    processing = PROCESSING()  # Utility class for post-processing
+    print("Processing utilities loaded.")
+except Exception as e:
+    print(f"An unexpected error occurred. Details: {e}")
+# Serve HTML documentation for the API
+@app.get("/", response_class=HTMLResponse)
+async def get_docs():
+    """
+    Serve HTML documentation for the WebSocket-based image processing API.
+    The HTML file must be available in the same directory.
+    Returns a 404 error if the documentation file is not found.
+    """
+    html_path = os.path.join(os.path.dirname(__file__), "api_documentation.html")
+    if not os.path.exists(html_path):
+        return HTMLResponse(content="api_documentation.html file not found", status_code=404)
+    with open(html_path, "r") as f:
+        return HTMLResponse(f.read())
+@app.get("/try_page", response_class=HTMLResponse)
+async def get_docs():
+    """
+    Serve HTML documentation for the WebSocket-based image processing API.
+    The HTML file must be available in the same directory.
+    Returns a 404 error if the documentation file is not found.
+    """
+    html_path = os.path.join(os.path.dirname(__file__), "try_page.html")
+    if not os.path.exists(html_path):
+        return HTMLResponse(content="try_page.html file not found", status_code=404)
+    with open(html_path, "r") as f:
+        return HTMLResponse(f.read())
+# Function to decode the image received via WebSocket
+async def decode_image(image_bytes):
+    """
+    Decodes image bytes into a PIL Image and returns the image along with its shape.
+    Args:
+        image_bytes (bytes): The image data received from the client.
+    Returns:
+        tuple: A tuple containing:
+            - pil_image (PIL.Image): The decoded image.
+            - img_shape (tuple): Shape of the image as (height, width).
+            - decode_time (float): Time taken to decode the image in seconds.
+    Raises:
+        ValueError: If image decoding fails.
+    """
+    start = timeit.default_timer()
+    nparr = np.frombuffer(image_bytes, np.uint8)
+    frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    if frame is None:
+        raise ValueError("Failed to decode image")
+    color_converted = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    pil_image = Image.fromarray(color_converted)
+    img_shape = color_converted.shape[0:2]  # (height, width)
+    end = timeit.default_timer()
+    return pil_image, img_shape, end - start
+# Function to run the DETR model for object detection
+async def run_detr_model(pil_image):
+    """
+    Runs the DETR (DEtection TRansformer) model to perform object detection on the input image.
+    Args:
+        pil_image (PIL.Image): The image to be processed by the DETR model.
+    Returns:
+        tuple: A tuple containing:
+            - detr_result (tuple): The DETR model output consisting of detections' scores and boxes.
+            - detr_time (float): Time taken to run the DETR model in seconds.
+    """
+    start = timeit.default_timer()
+    detr_result = await asyncio.to_thread(detr.detect, pil_image)
+    end = timeit.default_timer()
+    return detr_result, end - start
+# Function to run the GLPN model for depth estimation
+async def run_glpn_model(pil_image, img_shape):
+    """
+    Runs the GLPN (Global Local Prediction Network) model to estimate the depth of the objects in the image.
+    Args:
+        pil_image (PIL.Image): The image to be processed by the GLPN model.
+        img_shape (tuple): The shape of the image as (height, width).
+    Returns:
+        tuple: A tuple containing:
+            - depth_map (numpy.ndarray): The depth map for the input image.
+            - glpn_time (float): Time taken to run the GLPN model in seconds.
+    """
+    start = timeit.default_timer()
+    depth_map = await asyncio.to_thread(glpn.predict, pil_image, img_shape)
+    end = timeit.default_timer()
+    return depth_map, end - start
+# Function to process the detections with depth map
+async def process_detections(scores, boxes, depth_map):
+    """
+    Processes the DETR model detections and integrates depth information from the GLPN model.
+    Args:
+        scores (numpy.ndarray): The detection scores for the detected objects.
+        boxes (numpy.ndarray): The bounding boxes for the detected objects.
+        depth_map (numpy.ndarray): The depth map generated by the GLPN model.
+    Returns:
+        tuple: A tuple containing:
+            - pdata (dict): Processed detection data including depth and bounding box information.
+            - process_time (float): Time taken for processing detections in seconds.
+    """
+    start = timeit.default_timer()
+    pdata = processing.process_detections(scores, boxes, depth_map, detr)
+    end = timeit.default_timer()
+    return pdata, end - start
+# Function to generate JSON output for LSTM predictions
+async def generate_json_output(data):
+    """
+       Predict Z-location for each object in the data and prepare the JSON output.
+       Parameters:
+       - data: DataFrame with bounding box coordinates, depth information, and class type.
+       - ZlocE: Pre-loaded LSTM model for Z-location prediction.
+       - scaler: Scaler for normalizing input data.
+       Returns:
+       - JSON structure with object class, distance estimated, and relevant features.
+       """
+    output_json = []
+    start = timeit.default_timer()
+    # Iterate over each row in the data
+    for i, row in data.iterrows():
+        # Predict distance for each object using the single-row prediction function
+        distance = predict_z_location_single_row_lstm(row, zlocE_LSTM, lstm_scaler)
+        # Create object info dictionary
+        object_info = {
+            "class": row["class"],  # Object class (e.g., 'car', 'truck')
+            "distance_estimated": float(distance),  # Convert distance to float (if necessary)
+            "features": {
+                "xmin": float(row["xmin"]),  # Bounding box xmin
+                "ymin": float(row["ymin"]),  # Bounding box ymin
+                "xmax": float(row["xmax"]),  # Bounding box xmax
+                "ymax": float(row["ymax"]),  # Bounding box ymax
+                "mean_depth": float(row["depth_mean"]),  # Depth mean
+                "depth_mean_trim": float(row["depth_mean_trim"]),  # Depth mean trim
+                "depth_median": float(row["depth_median"]),  # Depth median
+                "width": float(row["width"]),  # Object width
+                "height": float(row["height"])  # Object height
+            }
+        }
+        # Append each object info to the output JSON list
+        output_json.append(object_info)
+    end = timeit.default_timer()
+    # Return the final JSON output structure, and time
+    return {"objects": output_json}, end - start
+# Function to process a single frame (image) in the WebSocket stream
+async def process_frame(frame_id, image_bytes):
+    """
+    Processes a single frame (image) from the WebSocket stream. The process includes:
+    - Decoding the image.
+    - Running the DETR and GLPN models concurrently.
+    - Processing detections and generating the final output JSON.
+    Args:
+        frame_id (int): The identifier for the frame being processed.
+        image_bytes (bytes): The image data received from the WebSocket.
+    Returns:
+        dict: A dictionary containing the output JSON and timing information for each processing step.
+    """
+    timings = {}
+    try:
+        # Step 1: Decode the image
+        pil_image, img_shape, decode_time = await decode_image(image_bytes)
+        timings["decode_time"] = decode_time
+        # Step 2: Run DETR and GLPN models in parallel
+        (detr_result, detr_time), (depth_map, glpn_time) = await asyncio.gather(
+            run_detr_model(pil_image),
+            run_glpn_model(pil_image, img_shape)
+        )
+        models_time = max(detr_time, glpn_time)  # Take the longest time of the two models
+        timings["models_time"] = models_time
+        # Step 3: Process detections with depth map
+        scores, boxes = detr_result
+        pdata, process_time = await process_detections(scores, boxes, depth_map)
+        timings["process_time"] = process_time
+        # Step 4: Generate output JSON
+        print("generate json")
+        output_json, json_time = await generate_json_output(pdata)
+        print(output_json)
+        timings["json_time"] = json_time
+        timings["total_time"] = decode_time + models_time + process_time + json_time
+        # Add frame_id and timings to the JSON output
+        output_json["frame_id"] = frame_id
+        output_json["timings"] = timings
+        return output_json
+    except Exception as e:
+        return {
+            "error": str(e),
+            "frame_id": frame_id,
+            "timings": timings
+        }
+@app.post("/api/predict", summary="Process a single image for object detection and depth estimation")
+async def process_image(file: UploadFile = File(...)):
+    """
+    Process a single image for object detection and depth estimation.
+    The endpoint performs:
+    - Object detection using DETR model
+    - Depth estimation using GLPN model
+    - Z-location prediction using LSTM model
+    Parameters:
+    - file: Image file to process (JPEG, PNG)
+    Returns:
+    - JSON response with detected objects, estimated distances, and timing information
+    """
+    try:
+        # Read image content
+        image_bytes = await file.read()
+        if not image_bytes:
+            raise HTTPException(status_code=400, detail="Empty file")
+        # Use the same processing pipeline as the WebSocket endpoint
+        result = await process_frame(0, image_bytes)
+        # Check if there's an error
+        if "error" in result:
+            raise HTTPException(status_code=500, detail=result["error"])
+        return JSONResponse(content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Add custom OpenAPI documentation
+@app.get("/api/docs", include_in_schema=False)
+async def custom_swagger_ui_html():
+    return get_swagger_ui_html(
+        openapi_url="/api/openapi.json",
+        title="Real-Time Image Processing API Documentation",
+        swagger_js_url="https://cdnjs.cloudflare.com/ajax/libs/swagger-ui/4.18.3/swagger-ui-bundle.js",
+        swagger_css_url="https://cdnjs.cloudflare.com/ajax/libs/swagger-ui/4.18.3/swagger-ui.css",
+    )
+@app.get("/api/openapi.json", include_in_schema=False)
+async def get_open_api_endpoint():
+    return get_openapi(
+        title="Real-Time Image Processing API",
+        version="1.0.0",
+        description="API for object detection, depth estimation, and z-location prediction using computer vision models",
+        routes=app.routes,
+    )
+@app.websocket("/ws/predict")
+async def websocket_endpoint(websocket: WebSocket):
+    """
+    WebSocket endpoint for real-time image processing. Clients can send image frames to be processed
+    and receive JSON output containing object detection, depth estimation, and predictions in real-time.
+    - Handles the reception of image data over the WebSocket.
+    - Calls the image processing pipeline and returns the result.
+    Args:
+        websocket (WebSocket): The WebSocket connection to the client.
+    """
+    await websocket.accept()
+    frame_id = 0
+    try:
+        while True:
+            frame_id += 1
+            # Receive image bytes from the client
+            image_bytes = await websocket.receive_bytes()
+            # Process the frame asynchronously
+            processing_task = asyncio.create_task(process_frame(frame_id, image_bytes))
+            result = await processing_task
+            # Send the result back to the client
+            await websocket.send_text(json.dumps(result))
+    except WebSocketDisconnect:
+        print(f"Client disconnected after processing {frame_id} frames.")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+    finally:
+        await websocket.close()

config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import torch
+CONFIG = {
+    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
+    'detr_model_path': 'facebook/detr-resnet-101',
+    'glpn_model_path': 'vinvino02/glpn-kitti',
+    'lstm_model_path': 'data/models/pretrained_lstm.pth',
+    'lstm_scaler_path': 'data/models/lstm_scaler.pkl',
+    'xgboost_path': 'data/models/xgboost_model.json',
+    'xgboost_scaler_path': 'data/models/scaler.joblib',
+    'numerical_cols_path': 'data/models/numerical_columns.joblib'
+}

data/models/lstm_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e21e30154403927e4696795a086191bacf12a03aecb904992faa2dc9fb17343
+size 809

data/models/pretrained_lstm.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9a7728cd4db9ebb4c8c0cb2fef644ae7d159359864002aa94148f9ea127005c
+size 31160407

models/__init__.py ADDED Viewed

File without changes

models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (167 Bytes). View file

models/__pycache__/detr_model.cpython-311.pyc ADDED Viewed

Binary file (7.7 kB). View file

models/__pycache__/distance_prediction.cpython-311.pyc ADDED Viewed

Binary file (2.92 kB). View file

models/__pycache__/glpn_model.cpython-311.pyc ADDED Viewed

Binary file (2.95 kB). View file

models/__pycache__/lstm_model.cpython-311.pyc ADDED Viewed

Binary file (4.22 kB). View file

models/__pycache__/predict_z_location_single_row_lstm.cpython-311.pyc ADDED Viewed

Binary file (2.89 kB). View file

models/detr_model.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Created on Sat Apr  9 04:08:02 2022
+@author: Admin_with ODD Team
+Edited by our team : Sat Oct 5 10:00 2024
+references: https://github.com/vinvino02/GLPDepth
+"""
+import io
+import torch
+import base64
+from config import CONFIG
+from torchvision import transforms
+from matplotlib import pyplot as plt
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from transformers import DetrForObjectDetection, DetrImageProcessor
+class DETR:
+    def __init__(self):
+        self.CLASSES = [
+            'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+            'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
+            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
+            'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
+            'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
+            'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
+            'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
+            'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
+            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
+            'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
+            'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
+            'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
+            'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
+            'toothbrush'
+        ]
+        self.COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098],
+                       [0.929, 0.694, 0.125], [0, 0, 1], [0.466, 0.674, 0.188],
+                       [0.301, 0.745, 0.933]]
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+        self.model = DetrForObjectDetection.from_pretrained(CONFIG['detr_model_path'], revision="no_timm")
+        self.model.to(CONFIG['device'])
+        self.model.eval()
+    def box_cxcywh_to_xyxy(self, x):
+        x_c, y_c, w, h = x.unbind(1)
+        b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
+             (x_c + 0.5 * w), (y_c + 0.5 * h)]
+        return torch.stack(b, dim=1).to(CONFIG['device'])
+    def rescale_bboxes(self, out_bbox, size):
+        img_w, img_h = size
+        b = self.box_cxcywh_to_xyxy(out_bbox)
+        b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32).to(CONFIG['device'])
+        return b
+    def detect(self, im):
+        img = self.transform(im).unsqueeze(0).to(CONFIG['device'])
+        assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'Image too large'
+        outputs = self.model(img)
+        probas = outputs['logits'].softmax(-1)[0, :, :-1]
+        keep = probas.max(-1).values > 0.7
+        bboxes_scaled = self.rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
+        return probas[keep], bboxes_scaled
+    def visualize(self, im, probas, bboxes):
+        """
+        Visualizes the detected bounding boxes and class probabilities on the image.
+        Parameters:
+            im (PIL.Image): The original input image.
+            probas (Tensor): Class probabilities for detected objects.
+            bboxes (Tensor): Bounding boxes for detected objects.
+        """
+        # Convert image to RGB format for matplotlib
+        fig, ax = plt.subplots(figsize=(10, 6))
+        ax.imshow(im)
+        # Iterate over detections and draw bounding boxes and labels
+        for p, (xmin, ymin, xmax, ymax), color in zip(probas, bboxes, self.COLORS * 100):
+            # Detach tensors and convert to float
+            xmin, ymin, xmax, ymax = map(lambda x: x.detach().cpu().numpy().item(), (xmin, ymin, xmax, ymax))
+            ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
+                                        fill=False, color=color, linewidth=3))
+            cl = p.argmax()
+            text = f'{self.CLASSES[cl]}: {p[cl].detach().cpu().numpy():0.2f}'  # Detach probability as well
+            ax.text(xmin, ymin, text, fontsize=15, bbox=dict(facecolor='yellow', alpha=0.5))
+        ax.axis('off')
+        # Convert the plot to a PIL Image and then to bytes
+        canvas = FigureCanvas(fig)
+        buf = io.BytesIO()
+        canvas.print_png(buf)
+        buf.seek(0)
+        # Base64 encode the image
+        img_bytes = buf.getvalue()
+        img_base64 = base64.b64encode(img_bytes).decode('utf-8')
+        # Close the figure to release memory
+        plt.close(fig)
+        return img_base64

models/glpn_model.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+Created on Sat Apr  9 04:08:02 2022
+@author: Admin_with ODD Team
+Edited by our team : Sat Oct 5 10:00 2024
+references: https://github.com/vinvino02/GLPDepth
+"""
+import torch
+from transformers import GLPNForDepthEstimation, GLPNFeatureExtractor
+from PIL import Image
+from config import CONFIG
+# GLPDepth Model Class
+class GLPDepth:
+    def __init__(self):
+        self.feature_extractor = GLPNFeatureExtractor.from_pretrained(CONFIG['glpn_model_path'])
+        self.model = GLPNForDepthEstimation.from_pretrained(CONFIG['glpn_model_path'])
+        self.model.to(CONFIG['device'])
+        self.model.eval()
+    def predict(self, img: Image.Image, img_shape : tuple):
+        """Predict the depth map of the input image.
+        Args:
+            img (PIL.Image): Input image for depth estimation.
+            img_shape (tuple): Original image size (height, width).
+        Returns:
+            np.ndarray: The predicted depth map in numpy array format.
+        """
+        with torch.no_grad():
+            # Preprocess image and move to the appropriate device
+            pixel_values = self.feature_extractor(img, return_tensors="pt").pixel_values.to(CONFIG['device'])
+            # Get model output
+            outputs = self.model(pixel_values)
+            predicted_depth = outputs.predicted_depth
+            # Resize depth prediction to original image size
+            prediction = torch.nn.functional.interpolate(
+                predicted_depth.unsqueeze(1),
+                size=img_shape[:2],  # Interpolate to original image size
+                mode="bicubic",
+                align_corners=False,
+            )
+            prediction = prediction.squeeze().cpu().numpy()  # Convert to numpy array (shape: (H, W))
+        return prediction

models/lstm_model.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# -*- coding: utf-8 -*-
+"""
+Z-Location Estimator Model for Deployment
+Created on Mon May 23 04:55:50 2022
+@author: ODD_team
+Edited by our team : Sat Oct 4  11:00 PM 2024
+@based on LSTM model
+"""
+import torch
+import torch.nn as nn
+from config import CONFIG
+device = CONFIG['device']
+# Define the LSTM-based Z-location estimator model
+class Zloc_Estimator(nn.Module):
+    def __init__(self, input_dim, hidden_dim, layer_dim):
+        super(Zloc_Estimator, self).__init__()
+        # LSTM layer
+        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True, bidirectional=False)
+        # Fully connected layers
+        layersize = [306, 154, 76]
+        layerlist = []
+        n_in = hidden_dim
+        for i in layersize:
+            layerlist.append(nn.Linear(n_in, i))
+            layerlist.append(nn.ReLU())
+            n_in = i
+        layerlist.append(nn.Linear(layersize[-1], 1))  # Final output layer
+        self.fc = nn.Sequential(*layerlist)
+    def forward(self, x):
+        out, hn = self.rnn(x)
+        output = self.fc(out[:, -1])  # Get the last output for prediction
+        return output
+# Deployment-ready class for handling the model
+class LSTM_Model:
+    def __init__(self):
+        """
+        Initializes the LSTM model for deployment with predefined parameters
+        and loads the pre-trained model weights.
+        :param model_path: Path to the pre-trained model weights file (.pth)
+        """
+        self.input_dim = 15
+        self.hidden_dim = 612
+        self.layer_dim = 3
+        # Initialize the Z-location estimator model
+        self.model = Zloc_Estimator(self.input_dim, self.hidden_dim, self.layer_dim)
+        # Load the state dictionary from the file, using map_location in torch.load()
+        state_dict = torch.load(CONFIG['lstm_model_path'], map_location=device)
+        # Load the model with the state dictionary
+        self.model.load_state_dict(state_dict, strict=False)
+        self.model.to(device)  # This line ensures the model is moved to the right device
+        self.model.eval()  # Set the model to evaluation mode
+    def predict(self, data):
+        """
+        Predicts the z-location based on input data.
+        :param data: Input tensor of shape (batch_size, input_dim)
+        :return: Predicted z-location as a tensor
+        """
+        with torch.no_grad():  # Disable gradient computation for deployment
+            data = data.to(device)  # Move data to the appropriate device
+            data = data.reshape(-1, 1, self.input_dim)  # Reshape data to (batch_size, sequence_length, input_dim)
+            zloc = self.model(data)
+        return zloc.cpu()  # Return the output in CPU memory for further processing

models/predict_z_location_single_row_lstm.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import numpy as np
+def predict_z_location_single_row_lstm(row, ZlocE, scaler):
+    """
+    Preprocess bounding box coordinates, depth information, and class type
+    to predict Z-location using the LSTM model for a single row.
+    Parameters:
+    - row: A single row of DataFrame with bounding box coordinates, depth info, and class type.
+    - ZlocE: Pre-loaded LSTM model for Z-location prediction.
+    - scaler: Scaler for normalizing input data.
+    Returns:
+    - z_loc_prediction: Predicted Z-location for the given row.
+    """
+    # One-hot encoding of class type
+    class_type = row['class']
+    if class_type == 'bicycle':
+        class_tensor = torch.tensor([[0, 1, 0, 0, 0, 0]], dtype=torch.float32)
+    elif class_type == 'car':
+        class_tensor = torch.tensor([[0, 0, 1, 0, 0, 0]], dtype=torch.float32)
+    elif class_type == 'person':
+        class_tensor = torch.tensor([[0, 0, 0, 1, 0, 0]], dtype=torch.float32)
+    elif class_type == 'train':
+        class_tensor = torch.tensor([[0, 0, 0, 0, 1, 0]], dtype=torch.float32)
+    elif class_type == 'truck':
+        class_tensor = torch.tensor([[0, 0, 0, 0, 0, 1]], dtype=torch.float32)
+    else:
+        class_tensor = torch.tensor([[1, 0, 0, 0, 0, 0]], dtype=torch.float32)
+    # Prepare input data (bounding box + depth info)
+    input_data = np.array([row[['xmin', 'ymin', 'xmax', 'ymax', 'width', 'height', 'depth_mean', 'depth_median',
+                                'depth_mean_trim']].values], dtype=np.float32)
+    input_data = torch.from_numpy(input_data)
+    # Concatenate class information
+    input_data = torch.cat([input_data, class_tensor], dim=-1)
+    # Scale the input data
+    scaled_input = torch.tensor(scaler.transform(input_data), dtype=torch.float32).unsqueeze(0).unsqueeze(0)
+    # Use the LSTM model to predict the Z-location
+    z_loc_prediction = ZlocE.predict(scaled_input).detach().numpy()[0]
+    return z_loc_prediction

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+torch>=2.0.0
+torchvision>=0.15.0
+transformers~=4.32.1
+matplotlib>=3.7.0
+pandas>=2.0.0
+opencv-python>=4.8.0
+uvicorn>=0.24.0
+fastapi>=0.104.1
+gunicorn==21.2.0
+setuptools>=68.0.0
+scipy>=1.11.0
+scikit-learn>=1.3.0
+websockets>=11.0.0
+shapely>=2.0.0
+cython>=3.0.0
+numpy>=1.24.0
+pillow~=9.4.0
+joblib~=1.2.0
+xgboost==3.0.0
+joblib

try_page.html ADDED Viewed

	@@ -0,0 +1,778 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>HTTP Image Upload Demo</title>
+    <style>
+        /* Reset and base styles */
+        * {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 0;
+        }
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            color: #333;
+            line-height: 1.6;
+        }
+        /* Demo section styling */
+        .execution-section {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 2rem;
+            background-color: #f8f9fa;
+            border-radius: 8px;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+        }
+        .section-title {
+            font-size: 2rem;
+            color: #384B70;
+            margin-bottom: 1rem;
+            padding-bottom: 0.5rem;
+            border-bottom: 2px solid #507687;
+        }
+        .demo-container {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 2rem;
+            margin-top: 1.5rem;
+        }
+        .upload-container, .response-container {
+            flex: 1;
+            min-width: 300px;
+            padding: 1.5rem;
+            background-color: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.05);
+        }
+        .container-title {
+            font-size: 1.5rem;
+            margin-bottom: 1rem;
+            color: #384B70;
+        }
+        /* Upload area styling */
+        .file-input-container {
+            border: 2px dashed #ccc;
+            border-radius: 5px;
+            padding: 2rem;
+            text-align: center;
+            margin-bottom: 1rem;
+            transition: all 0.3s ease;
+        }
+        .file-input-container:hover {
+            border-color: #507687;
+            background-color: #f8f9fa;
+        }
+        #fileInput {
+            display: none;
+        }
+        .file-label {
+            cursor: pointer;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 0.5rem;
+        }
+        .file-icon {
+            font-size: 2.5rem;
+            color: #507687;
+            width: 64px;
+            height: 64px;
+        }
+        .file-placeholder {
+            max-width: 100%;
+            height: auto;
+            margin-top: 1rem;
+            border-radius: 4px;
+            display: none;
+        }
+        #sendButton {
+            background-color: #384B70;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            padding: 0.75rem 1.5rem;
+            font-size: 1rem;
+            cursor: pointer;
+            transition: background-color 0.3s;
+            width: 100%;
+            margin-top: 1rem;
+        }
+        #sendButton:disabled {
+            background-color: #cccccc;
+            cursor: not-allowed;
+        }
+        #sendButton:hover:not(:disabled) {
+            background-color: #507687;
+        }
+        /* Response area styling */
+        .response-output {
+            height: 300px;
+            overflow-y: auto;
+            background-color: #f8f9fa;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            padding: 1rem;
+            font-family: monospace;
+            white-space: pre-wrap;
+        }
+        /* Tabs styling */
+        .tabs {
+            display: flex;
+            border-bottom: 1px solid #ddd;
+            margin-bottom: 1rem;
+        }
+        .tab-button {
+            padding: 0.5rem 1rem;
+            background-color: #f1f1f1;
+            border: none;
+            cursor: pointer;
+            transition: background-color 0.3s;
+            font-size: 1rem;
+        }
+        .tab-button.active {
+            background-color: #384B70;
+            color: white;
+        }
+        .tab-content {
+            display: none;
+            height: 300px;
+        }
+        .tab-content.active {
+            display: block;
+        }
+        /* Visualization area styling */
+        #visualizationContainer {
+            position: relative;
+            height: 100%;
+            overflow: auto;
+            background-color: #f8f9fa;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+        }
+        .detection-canvas {
+            display: block;
+            margin: 0 auto;
+        }
+        /* Utilities */
+        #loading {
+            display: none;
+            margin-top: 1rem;
+            color: #384B70;
+            font-weight: bold;
+            text-align: center;
+        }
+        #message {
+            margin-top: 1rem;
+            padding: 0.75rem;
+            border-radius: 4px;
+            text-align: center;
+            display: none;
+        }
+        .error {
+            background-color: #ffebee;
+            color: #d32f2f;
+        }
+        .success {
+            background-color: #e8f5e9;
+            color: #388e3c;
+        }
+        .info {
+            font-size: 0.9rem;
+            color: #666;
+            margin-top: 0.5rem;
+        }
+        .stats {
+            margin-top: 1rem;
+            font-size: 0.9rem;
+            color: #666;
+        }
+        /* Debug output */
+        #debugOutput {
+            margin-top: 0.5rem;
+            font-size: 0.8rem;
+            color: #999;
+            border-top: 1px dashed #ddd;
+            padding-top: 0.5rem;
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <!-- Interactive Demo Section -->
+    <section class="execution-section">
+        <h2 class="section-title">Try It Yourself</h2>
+        <p>Upload an image and see the object detection and depth estimation results in real-time.</p>
+        <div class="demo-container">
+            <!-- Upload Container -->
+            <div class="upload-container">
+                <h3 class="container-title">Upload Image</h3>
+                <div class="file-input-container">
+                    <label for="fileInput" class="file-label">
+                        <img src="https://upload.wikimedia.org/wikipedia/commons/a/a1/Icons8_flat_folder.svg" class="file-icon"/>
+                        <span>Click to select image</span>
+                        <p class="info">PNG or JPEG, max 2MB</p>
+                    </label>
+                    <input type="file" accept="image/*" id="fileInput" />
+                    <img id="imagePreview" class="file-placeholder" alt="Image preview" />
+                </div>
+                <button id="sendButton" disabled>Process Image</button>
+                <div id="loading">Processing your image...</div>
+                <div id="message"></div>
+                <div class="stats">
+                    <div id="imageSize"></div>
+                    <div id="processingTime"></div>
+                </div>
+                <div id="debugOutput"></div>
+            </div>
+            <!-- Response Container with Tabs -->
+            <div class="response-container">
+                <h3 class="container-title">Response</h3>
+                <div class="tabs">
+                    <button class="tab-button active" data-tab="raw">Raw Output</button>
+                    <button class="tab-button" data-tab="visual">Visual Output</button>
+                </div>
+                <!-- Raw Output Tab -->
+                <div id="rawTab" class="tab-content active">
+                    <pre class="response-output" id="responseOutput">// Response will appear here after processing</pre>
+                </div>
+                <!-- Visual Output Tab -->
+                <div id="visualTab" class="tab-content">
+                    <div id="visualizationContainer">
+                        <canvas id="detectionCanvas" class="detection-canvas"></canvas>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </section>
+    <script>
+        // DOM Elements
+        const fileInput = document.getElementById('fileInput');
+        const imagePreview = document.getElementById('imagePreview');
+        const sendButton = document.getElementById('sendButton');
+        const loading = document.getElementById('loading');
+        const message = document.getElementById('message');
+        const responseOutput = document.getElementById('responseOutput');
+        const imageSizeInfo = document.getElementById('imageSize');
+        const processingTimeInfo = document.getElementById('processingTime');
+        const tabButtons = document.querySelectorAll('.tab-button');
+        const tabContents = document.querySelectorAll('.tab-content');
+        const detectionCanvas = document.getElementById('detectionCanvas');
+        const ctx = detectionCanvas.getContext('2d');
+        const debugOutput = document.getElementById('debugOutput');
+        // Enable debug mode (set to false in production)
+        const DEBUG = true;
+        // API endpoint URL
+        const API_URL = '/api/predict';
+        let imageFile = null;
+        let startTime = null;
+        let originalImage = null;
+        let processingWidth = 0;
+        let processingHeight = 0;
+        let responseData = null;
+        // Tab switching functionality
+        tabButtons.forEach(button => {
+            button.addEventListener('click', () => {
+                const tabName = button.getAttribute('data-tab');
+                // Update button states
+                tabButtons.forEach(btn => btn.classList.remove('active'));
+                button.classList.add('active');
+                // Update tab content visibility
+                tabContents.forEach(content => content.classList.remove('active'));
+                document.getElementById(tabName + 'Tab').classList.add('active');
+                // If switching to visual tab and we have data, ensure visualization is rendered
+                if (tabName === 'visual' && responseData && originalImage) {
+                    visualizeResults(originalImage, responseData);
+                }
+            });
+        });
+        // Handle file input change
+        fileInput.addEventListener('change', (event) => {
+            const file = event.target.files[0];
+            // Clear previous selections
+            imageFile = null;
+            imagePreview.style.display = 'none';
+            sendButton.disabled = true;
+            originalImage = null;
+            responseData = null;
+            // Validate file
+            if (!file) return;
+            if (file.size > 2 * 1024 * 1024) {
+                showMessage('File size exceeds 2MB limit.', 'error');
+                return;
+            }
+            if (!['image/png', 'image/jpeg'].includes(file.type)) {
+                showMessage('Only PNG and JPEG formats are supported.', 'error');
+                return;
+            }
+            // Store file for upload
+            imageFile = file;
+            // Show image preview
+            const reader = new FileReader();
+            reader.onload = (e) => {
+                const image = new Image();
+                image.src = e.target.result;
+                image.onload = () => {
+                    // Store original image for visualization
+                    originalImage = image;
+                    // Set preview
+                    imagePreview.src = e.target.result;
+                    imagePreview.style.display = 'block';
+                    // Update image info
+                    imageSizeInfo.textContent = `Original size: ${image.width}x${image.height} pixels`;
+                    // Calculate processing dimensions (for visualization)
+                    calculateProcessingDimensions(image.width, image.height);
+                    // Enable send button
+                    sendButton.disabled = false;
+                    showMessage('Image ready to process.', 'info');
+                };
+            };
+            reader.readAsDataURL(file);
+        });
+        // Calculate dimensions for processing visualization
+        function calculateProcessingDimensions(width, height) {
+            const maxWidth = 640;
+            const maxHeight = 320;
+            // Calculate dimensions
+            if (width > height) {
+                if (width > maxWidth) {
+                    height = Math.round((height * maxWidth) / width);
+                    width = maxWidth;
+                }
+            } else {
+                if (height > maxHeight) {
+                    width = Math.round((width * maxHeight) / height);
+                    height = maxHeight;
+                }
+            }
+            // Store processing dimensions for visualization
+            processingWidth = width;
+            processingHeight = height;
+        }
+        // Handle send button click
+        sendButton.addEventListener('click', async () => {
+            if (!imageFile) {
+                showMessage('No image selected.', 'error');
+                return;
+            }
+            // Clear previous response
+            responseOutput.textContent = "// Processing...";
+            clearCanvas();
+            responseData = null;
+            debugOutput.style.display = 'none';
+            // Show loading state
+            loading.style.display = 'block';
+            message.style.display = 'none';
+            // Reset processing time
+            processingTimeInfo.textContent = '';
+            // Record start time
+            startTime = performance.now();
+            // Create form data for HTTP request
+            const formData = new FormData();
+            formData.append('file', imageFile);
+            try {
+                // Send HTTP request
+                const response = await fetch(API_URL, {
+                    method: 'POST',
+                    body: formData
+                });
+                // Handle response
+                if (!response.ok) {
+                    const errorText = await response.text();
+                    throw new Error(`HTTP error ${response.status}: ${errorText}`);
+                }
+                // Parse JSON response
+                const data = await response.json();
+                responseData = data;
+                // Calculate processing time
+                const endTime = performance.now();
+                const timeTaken = endTime - startTime;
+                // Format and display raw response
+                responseOutput.textContent = JSON.stringify(data, null, 2);
+                processingTimeInfo.textContent = `Processing time: ${timeTaken.toFixed(2)} ms`;
+                // Visualize the results
+                if (originalImage) {
+                    visualizeResults(originalImage, data);
+                }
+                // Show success message
+                showMessage('Image processed successfully!', 'success');
+            } catch (error) {
+                console.error('Error processing image:', error);
+                showMessage(`Error: ${error.message}`, 'error');
+                responseOutput.textContent = `// Error: ${error.message}`;
+                if (DEBUG) {
+                    debugOutput.style.display = 'block';
+                    debugOutput.textContent = `Error: ${error.message}\n${error.stack || ''}`;
+                }
+            } finally {
+                loading.style.display = 'none';
+            }
+        });
+        // Visualize detection results
+        function visualizeResults(image, data) {
+            try {
+                // Set canvas dimensions
+                detectionCanvas.width = processingWidth;
+                detectionCanvas.height = processingHeight;
+                // Draw the original image
+                ctx.drawImage(image, 0, 0, processingWidth, processingHeight);
+                // Set styles for bounding boxes
+                ctx.lineWidth = 3;
+                ctx.font = 'bold 14px Arial';
+                // Find detections (checking all common formats)
+                let detections = [];
+                let detectionSource = '';
+                if (data.detections && Array.isArray(data.detections)) {
+                    detections = data.detections;
+                    detectionSource = 'detections';
+                } else if (data.predictions && Array.isArray(data.predictions)) {
+                    detections = data.predictions;
+                    detectionSource = 'predictions';
+                } else if (data.objects && Array.isArray(data.objects)) {
+                    detections = data.objects;
+                    detectionSource = 'objects';
+                } else if (data.results && Array.isArray(data.results)) {
+                    detections = data.results;
+                    detectionSource = 'results';
+                } else {
+                    // Try to look one level deeper if no detections found
+                    for (const key in data) {
+                        if (typeof data[key] === 'object' && data[key] !== null) {
+                            if (Array.isArray(data[key])) {
+                                detections = data[key];
+                                detectionSource = key;
+                                break;
+                            } else {
+                                // Look one more level down
+                                for (const subKey in data[key]) {
+                                    if (Array.isArray(data[key][subKey])) {
+                                        detections = data[key][subKey];
+                                        detectionSource = `${key}.${subKey}`;
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                // Process each detection
+                detections.forEach((detection, index) => {
+                    // Try to extract bounding box information
+                    let bbox = null;
+                    let label = null;
+                    let confidence = null;
+                    let distance = null;
+                    // Extract label/class
+                    if (detection.class !== undefined) {
+                        label = detection.class;
+                    } else {
+                        // Fallback to other common property names
+                        for (const key of ['label', 'name', 'category', 'className']) {
+                            if (detection[key] !== undefined) {
+                                label = detection[key];
+                                break;
+                            }
+                        }
+                    }
+                    // Default label if none found
+                    if (!label) label = `Object ${index + 1}`;
+                    // Extract confidence score if available
+                    for (const key of ['confidence', 'score', 'probability', 'conf']) {
+                        if (detection[key] !== undefined) {
+                            confidence = detection[key];
+                            break;
+                        }
+                    }
+                    // Extract distance - specifically look for distance_estimated first
+                    if (detection.distance_estimated !== undefined) {
+                        distance = detection.distance_estimated;
+                    } else {
+                        // Fallback to other common distance properties
+                        for (const key of ['distance', 'depth', 'z', 'dist', 'range']) {
+                            if (detection[key] !== undefined) {
+                                distance = detection[key];
+                                break;
+                            }
+                        }
+                    }
+                    // Look for bounding box in features
+                    if (detection.features &&
+                        detection.features.xmin !== undefined &&
+                        detection.features.ymin !== undefined &&
+                        detection.features.xmax !== undefined &&
+                        detection.features.ymax !== undefined) {
+                        bbox = {
+                            xmin: detection.features.xmin,
+                            ymin: detection.features.ymin,
+                            xmax: detection.features.xmax,
+                            ymax: detection.features.ymax
+                        };
+                    } else {
+                        // Recursively search for bbox-like properties
+                        function findBBox(obj, path = '') {
+                            if (!obj || typeof obj !== 'object') return null;
+                            // Check if this object looks like a bbox
+                            if ((obj.x !== undefined && obj.y !== undefined &&
+                                (obj.width !== undefined || obj.w !== undefined ||
+                                 obj.height !== undefined || obj.h !== undefined)) ||
+                                (obj.xmin !== undefined && obj.ymin !== undefined &&
+                                 obj.xmax !== undefined && obj.ymax !== undefined)) {
+                                return obj;
+                            }
+                            // Check if it's an array of 4 numbers (potential bbox)
+                            if (Array.isArray(obj) && obj.length === 4 &&
+                                obj.every(item => typeof item === 'number')) {
+                                return obj;
+                            }
+                            // Check common bbox property names
+                            for (const key of ['bbox', 'box', 'bounding_box', 'boundingBox']) {
+                                if (obj[key] !== undefined) {
+                                    return obj[key];
+                                }
+                            }
+                            // Search nested properties
+                            for (const key in obj) {
+                                const result = findBBox(obj[key], path ? `${path}.${key}` : key);
+                                if (result) return result;
+                            }
+                            return null;
+                        }
+                        // Find bbox using recursive search as fallback
+                        bbox = findBBox(detection);
+                    }
+                    // If we found a bounding box, draw it
+                    if (bbox) {
+                        // Parse different bbox formats
+                        let x, y, width, height;
+                        if (Array.isArray(bbox)) {
+                            // Try to determine array format
+                            if (bbox.length === 4) {
+                                if (bbox[0] >= 0 && bbox[1] >= 0 && bbox[2] <= 1 && bbox[3] <= 1) {
+                                    // Likely normalized [x1, y1, x2, y2]
+                                    x = bbox[0] * processingWidth;
+                                    y = bbox[1] * processingHeight;
+                                    width = (bbox[2] - bbox[0]) * processingWidth;
+                                    height = (bbox[3] - bbox[1]) * processingHeight;
+                                } else if (bbox[2] > bbox[0] && bbox[3] > bbox[1]) {
+                                    // Likely [x1, y1, x2, y2]
+                                    x = bbox[0];
+                                    y = bbox[1];
+                                    width = bbox[2] - bbox[0];
+                                    height = bbox[3] - bbox[1];
+                                } else {
+                                    // Assume [x, y, width, height]
+                                    x = bbox[0];
+                                    y = bbox[1];
+                                    width = bbox[2];
+                                    height = bbox[3];
+                                }
+                            }
+                        } else {
+                            // Object format with named properties
+                            if (bbox.x !== undefined && bbox.y !== undefined) {
+                                x = bbox.x;
+                                y = bbox.y;
+                                width = bbox.width || bbox.w || 0;
+                                height = bbox.height || bbox.h || 0;
+                            } else if (bbox.xmin !== undefined && bbox.ymin !== undefined) {
+                                x = bbox.xmin;
+                                y = bbox.ymin;
+                                width = (bbox.xmax || 0) - bbox.xmin;
+                                height = (bbox.ymax || 0) - bbox.ymin;
+                            }
+                        }
+                        // Validate coordinates
+                        if (x === undefined || y === undefined || width === undefined || height === undefined) {
+                            return;
+                        }
+                        // Check if we need to scale normalized coordinates (0-1)
+                        if (x >= 0 && x <= 1 && y >= 0 && y <= 1 && width >= 0 && width <= 1 && height >= 0 && height <= 1) {
+                            x = x * processingWidth;
+                            y = y * processingHeight;
+                            width = width * processingWidth;
+                            height = height * processingHeight;
+                        }
+                        // Generate a color based on the class name
+                        const hue = stringToHue(label);
+                        ctx.strokeStyle = `hsl(${hue}, 100%, 40%)`;
+                        ctx.fillStyle = `hsla(${hue}, 100%, 40%, 0.3)`;
+                        // Draw bounding box
+                        ctx.beginPath();
+                        ctx.rect(x, y, width, height);
+                        ctx.stroke();
+                        ctx.fill();
+                        // Format confidence value
+                        let confidenceText = "";
+                        if (confidence !== null && confidence !== undefined) {
+                            // Convert to percentage if it's a probability (0-1)
+                            if (confidence <= 1) {
+                                confidence = (confidence * 100).toFixed(0);
+                            } else {
+                                confidence = confidence.toFixed(0);
+                            }
+                            confidenceText = ` ${confidence}%`;
+                        }
+                        // Format distance value
+                        let distanceText = "";
+                        if (distance !== null && distance !== undefined) {
+                            distanceText = ` : ${distance.toFixed(2)} m`;
+                        }
+                        // Create label text
+                        const labelText = `${label}${confidenceText}${distanceText}`;
+                        // Measure text width
+                        const textWidth = ctx.measureText(labelText).width + 10;
+                        // Draw label background
+                        ctx.fillStyle = `hsl(${hue}, 100%, 40%)`;
+                        ctx.fillRect(x, y - 20, textWidth, 20);
+                        // Draw label text
+                        ctx.fillStyle = "white";
+                        ctx.fillText(labelText, x + 5, y - 5);
+                    }
+                });
+            } catch (error) {
+                console.error('Error visualizing results:', error);
+                debugOutput.style.display = 'block';
+                debugOutput.textContent += `VISUALIZATION ERROR: ${error.message}\n`;
+                debugOutput.textContent += `Error stack: ${error.stack}\n`;
+            }
+        }
+        // Generate consistent hue for string
+        function stringToHue(str) {
+            let hash = 0;
+            for (let i = 0; i < str.length; i++) {
+                hash = str.charCodeAt(i) + ((hash << 5) - hash);
+            }
+            return hash % 360;
+        }
+        // Clear canvas
+        function clearCanvas() {
+            if (detectionCanvas.getContext) {
+                ctx.clearRect(0, 0, detectionCanvas.width, detectionCanvas.height);
+            }
+        }
+        // Show message function
+        function showMessage(text, type) {
+            message.textContent = text;
+            message.className = '';
+            message.classList.add(type);
+            message.style.display = 'block';
+            if (type === 'info') {
+                setTimeout(() => {
+                    message.style.display = 'none';
+                }, 3000);
+            }
+        }
+    </script>
+</body>
+</html>

utils/JSON_output.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from models.predict_z_location_single_row_lstm import predict_z_location_single_row
+def generate_output_json(data, ZlocE, scaler):
+    """
+    Predict Z-location for each object in the data and prepare the JSON output.
+    Parameters:
+    - data: DataFrame with bounding box coordinates, depth information, and class type.
+    - ZlocE: Pre-loaded LSTM model for Z-location prediction.
+    - scaler: Scaler for normalizing input data.
+    Returns:
+    - JSON structure with object class, distance estimated, and relevant features.
+    """
+    output_json = []
+    # Iterate over each row in the data
+    for i, row in data.iterrows():
+        # Predict distance for each object using the single-row prediction function
+        distance = predict_z_location_single_row(row, ZlocE, scaler)
+        # Create object info dictionary
+        object_info = {
+            "class": row["class"],  # Object class (e.g., 'car', 'truck')
+            "distance_estimated": float(distance),  # Convert distance to float (if necessary)
+            "features": {
+                "xmin": float(row["xmin"]),  # Bounding box xmin
+                "ymin": float(row["ymin"]),  # Bounding box ymin
+                "xmax": float(row["xmax"]),  # Bounding box xmax
+                "ymax": float(row["ymax"]),  # Bounding box ymax
+                "mean_depth": float(row["depth_mean"]),  # Depth mean
+                "depth_mean_trim": float(row["depth_mean_trim"]),  # Depth mean trim
+                "depth_median": float(row["depth_median"]),  # Depth median
+                "width": float(row["width"]),  # Object width
+                "height": float(row["height"])  # Object height
+            }
+        }
+        # Append each object info to the output JSON list
+        output_json.append(object_info)
+    # Return the final JSON output structure
+    return {"objects": output_json}

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/JSON_output.cpython-311.pyc ADDED Viewed

Binary file (1.93 kB). View file

utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (183 Bytes). View file

utils/__pycache__/processing.cpython-311.pyc ADDED Viewed

Binary file (3.25 kB). View file

utils/build/lib.win-amd64-cpython-311/processing_cy.cp311-win_amd64.pyd ADDED Viewed

Binary file (71.7 kB). View file

utils/build/temp.win-amd64-cpython-311/Release/processing.obj ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:651a39060235c15b7fecf63a9e462ae0c379f5a194a745110cf259131fc235a2
+size 539061

utils/build/temp.win-amd64-cpython-311/Release/processing_cy.cp311-win_amd64.exp ADDED Viewed

Binary file (806 Bytes). View file

utils/build/temp.win-amd64-cpython-311/Release/processing_cy.cp311-win_amd64.lib ADDED Viewed

Binary file (2.12 kB). View file

utils/processing.c ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/processing.html ADDED Viewed

The diff for this file is too large to render. See raw diff

utils/processing.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+Created on Sat Apr  9 04:08:02 2022
+@author: Admin_with ODD Team
+Edited by our team : Sat Oct 12 2024
+references: https://github.com/vinvino02/GLPDepth
+"""
+import numpy as np
+import pandas as pd
+import torch
+from utils.processing_cy import process_bbox_depth_cy, handle_overlaps_cy
+class PROCESSING:
+    def process_detections(self, scores, boxes, depth_map, detr):
+        self.data = pd.DataFrame(columns=['xmin','ymin','xmax','ymax','width', 'height',
+                                        'depth_mean_trim','depth_mean','depth_median',
+                                        'class', 'rgb'])
+        boxes_array = np.array([[int(box[1]), int(box[0]), int(box[3]), int(box[2])]
+                               for box in boxes.tolist()], dtype=np.int32)
+        # Use Cython-optimized overlap handling
+        valid_indices = handle_overlaps_cy(depth_map, boxes_array)
+        for idx in valid_indices:
+            p = scores[idx]
+            box = boxes[idx]
+            xmin, ymin, xmax, ymax = map(int, box)
+            detected_class = p.argmax()
+            class_label = detr.CLASSES[detected_class]
+            # Map classes
+            if class_label == 'motorcycle':
+                class_label = 'bicycle'
+            elif class_label == 'bus':
+                class_label = 'train'
+            elif class_label not in ['person', 'truck', 'car', 'bicycle', 'train']:
+                class_label = 'Misc'
+            if class_label in ['Misc', 'person', 'truck', 'car', 'bicycle', 'train']:
+                # Use Cython-optimized depth calculations
+                depth_mean, depth_median, (depth_trim_low, depth_trim_high) = \
+                    process_bbox_depth_cy(depth_map, ymin, ymax, xmin, xmax)
+                class_index = ['Misc', 'person', 'truck', 'car', 'bicycle', 'train'].index(class_label)
+                r, g, b = detr.COLORS[class_index]
+                rgb = (r * 255, g * 255, b * 255)
+                new_row = pd.DataFrame([[xmin, ymin, xmax, ymax, xmax - xmin, ymax - ymin,
+                                       (depth_trim_low + depth_trim_high) / 2,
+                                       depth_mean, depth_median, class_label, rgb]],
+                                     columns=self.data.columns)
+                self.data = pd.concat([self.data, new_row], ignore_index=True)
+        return self.data

utils/processing.pyx ADDED Viewed

	@@ -0,0 +1,101 @@

+# File: processing.pyx
+# cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
+import numpy as np
+cimport numpy as np
+import pandas as pd
+cimport pandas as pd
+from libc.math cimport isnan
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+# Define C types for better performance
+ctypedef np.float32_t DTYPE_t
+ctypedef np.int32_t ITYPE_t
+def process_bbox_depth_cy(np.ndarray[DTYPE_t, ndim=2] depth_map,
+                         int y_min, int y_max, int x_min, int x_max):
+    """
+    Optimized bbox depth calculations using Cython
+    """
+    cdef:
+        int i, j, count = 0
+        double sum_val = 0.0
+        double mean_val = 0.0
+        np.ndarray[DTYPE_t, ndim=1] flat_vals
+        int flat_size = 0
+    for i in range(y_min, y_max):
+        for j in range(x_min, x_max):
+            if not isnan(depth_map[i, j]):
+                sum_val += depth_map[i, j]
+                count += 1
+    if count > 0:
+        mean_val = sum_val / count
+    # Create array for trimmed mean calculation
+    flat_vals = np.zeros(count, dtype=np.float32)
+    flat_size = 0
+    for i in range(y_min, y_max):
+        for j in range(x_min, x_max):
+            if not isnan(depth_map[i, j]):
+                flat_vals[flat_size] = depth_map[i, j]
+                flat_size += 1
+    return mean_val, np.median(flat_vals), np.percentile(flat_vals, [20, 80])
+def handle_overlaps_cy(np.ndarray[DTYPE_t, ndim=2] depth_map,
+                      np.ndarray[ITYPE_t, ndim=2] boxes):
+    """
+    Optimized overlap handling using Cython
+    """
+    cdef:
+        int n_boxes = boxes.shape[0]
+        int i, j
+        int y_min1, y_max1, x_min1, x_max1
+        int y_min2, y_max2, x_min2, x_max2
+        double area1, area2, area_intersection
+        bint* to_remove = <bint*>PyMem_Malloc(n_boxes * sizeof(bint))
+    if not to_remove:
+        raise MemoryError()
+    try:
+        for i in range(n_boxes):
+            to_remove[i] = False
+        for i in range(n_boxes):
+            if to_remove[i]:
+                continue
+            y_min1, x_min1, y_max1, x_max1 = boxes[i]
+            for j in range(i + 1, n_boxes):
+                if to_remove[j]:
+                    continue
+                y_min2, x_min2, y_max2, x_max2 = boxes[j]
+                # Calculate intersection
+                y_min_int = max(y_min1, y_min2)
+                y_max_int = min(y_max1, y_max2)
+                x_min_int = max(x_min1, x_min2)
+                x_max_int = min(x_max1, x_max2)
+                if y_min_int < y_max_int and x_min_int < x_max_int:
+                    area1 = (y_max1 - y_min1) * (x_max1 - x_min1)
+                    area2 = (y_max2 - y_min2) * (x_max2 - x_min2)
+                    area_intersection = (y_max_int - y_min_int) * (x_max_int - x_min_int)
+                    if area_intersection / min(area1, area2) >= 0.70:
+                        if area1 < area2:
+                            to_remove[i] = True
+                            break
+                        else:
+                            to_remove[j] = True
+        return np.array([i for i in range(n_boxes) if not to_remove[i]], dtype=np.int32)
+    finally:
+        PyMem_Free(to_remove)

utils/processing_cy.cp311-win_amd64.pyd ADDED Viewed

Binary file (71.7 kB). View file

utils/setup.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from setuptools import setup, Extension
+from Cython.Build import cythonize
+import numpy as np
+extensions = [
+    Extension(
+        "processing_cy",
+        ["processing.pyx"],
+        include_dirs=[np.get_include()],
+        extra_compile_args=["-O3", "-march=native", "-fopenmp"],
+        extra_link_args=["-fopenmp"]
+    )
+]
+setup(
+    ext_modules=cythonize(extensions, annotate=True),
+    include_dirs=[np.get_include()]
+)

utils/visualization.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+def plot_depth_with_boxes(depth_map, depth_data):
+    """
+    Plots the depth map with bounding boxes overlayed.
+    Args:
+        depth_map (numpy.ndarray): The depth map to visualize.
+        depth_data (pandas.DataFrame): DataFrame containing bounding box coordinates, depth statistics, and class labels.
+    """
+    # Normalize the depth map for better visualization
+    depth_map_normalized = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+    # Create a figure and axis
+    fig, ax = plt.subplots(1, figsize=(12, 6))
+    # Display the depth map
+    ax.imshow(depth_map_normalized, cmap='plasma')  # You can change the colormap as desired
+    ax.axis('off')  # Hide the axes
+    # Loop through the DataFrame and add rectangles
+    for index, row in depth_data.iterrows():
+        xmin, ymin, xmax, ymax = row[['xmin', 'ymin', 'xmax', 'ymax']]
+        class_label = row['class']
+        score = row['depth_mean']  # or whichever statistic you prefer to display
+        # Create a rectangle patch
+        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor='yellow', facecolor='none')
+        # Add the rectangle to the plot
+        ax.add_patch(rect)
+        # Add a text label
+        ax.text(xmin, ymin - 5, f'{class_label}: {score:.2f}', color='white', fontsize=12, weight='bold')
+    plt.title('Depth Map with Object Detection Bounding Boxes', fontsize=16)
+    plt.show()