Spaces:

Proximile
/

Gemma3-Chat

Running

App Files Files Community

ProximileAdmin commited on Mar 29

Commit

e0b5376

verified ·

1 Parent(s): d060636

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -15

app.py CHANGED Viewed

@@ -36,6 +36,9 @@ VLLM_MODEL = os.environ.get('MODEL_NAME', 'google/gemma-3-27b-it')
 HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
 FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct'  # Fallback model at Hyperbolic
 # API endpoints
 VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
 HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
@@ -392,7 +395,7 @@ def get_tunnel_status_message():
     """
     Return a formatted status message for display in the UI.
     """
-    global tunnel_status, use_fallback
     api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
     model = get_model_name()
@@ -400,7 +403,7 @@ def get_tunnel_status_message():
     status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
     status_text = tunnel_status["message"]
-    return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}"
 def toggle_api():
     """
@@ -414,6 +417,30 @@ def toggle_api():
     return f"Switched to {api_mode} using {model}"
 # Start the SSH tunnel in a background thread
 if __name__ == "__main__":
     # Start the SSH tunnel
@@ -451,22 +478,24 @@ if __name__ == "__main__":
         # Clear button
         clear_btn = gr.Button("Clear Chat")
-        # Set up submit event chain
         submit_event = textbox.submit(
             fn=process_chat,
             inputs=[textbox, chatbot],
-            outputs=chatbot
         ).then(
             fn=lambda: {"text": "", "files": []},
             inputs=None,
             outputs=textbox
         )
-        # Connect the submit button to the same functions
         submit_btn.click(
             fn=process_chat,
             inputs=[textbox, chatbot],
-            outputs=chatbot
         ).then(
             fn=lambda: {"text": "", "files": []},
             inputs=None,
@@ -509,7 +538,6 @@ if __name__ == "__main__":
         # Refresh status button and toggle API button
         with gr.Row():
             refresh_btn = gr.Button("Refresh Status")
-            toggle_api_btn = gr.Button("Toggle API (Local/Hyperbolic)")
         # Set up refresh status button
         refresh_btn.click(
@@ -518,13 +546,6 @@ if __name__ == "__main__":
             outputs=status_text
         )
-        # Set up toggle API button
-        toggle_api_btn.click(
-            fn=toggle_api,
-            inputs=None,
-            outputs=status_text
-        )
         # Just load the initial status without auto-refresh
         demo.load(
             fn=get_tunnel_status_message,
@@ -532,5 +553,6 @@ if __name__ == "__main__":
             outputs=status_text
         )
-    # Launch the interface on a different port than the SSH tunnel
     demo.launch()

 HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
 FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct'  # Fallback model at Hyperbolic
+# Set the maximum number of concurrent API calls before queuing
+MAX_CONCURRENT = int(os.environ.get('MAX_CONCURRENT', 3))  # Default to 3 concurrent calls
 # API endpoints
 VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
 HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
     """
     Return a formatted status message for display in the UI.
     """
+    global tunnel_status, use_fallback, MAX_CONCURRENT
     api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
     model = get_model_name()
     status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
     status_text = tunnel_status["message"]
+    return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}\nConcurrent Requests: {MAX_CONCURRENT}"
 def toggle_api():
     """
     return f"Switched to {api_mode} using {model}"
+def update_concurrency(new_value):
+    """
+    Update the MAX_CONCURRENT value.
+    Args:
+        new_value (str): New concurrency value as string
+    Returns:
+        str: Status message
+    """
+    global MAX_CONCURRENT
+    try:
+        value = int(new_value)
+        if value < 1:
+            return f"Error: Concurrency must be at least 1. Keeping current value: {MAX_CONCURRENT}"
+        MAX_CONCURRENT = value
+        # Note: This only updates the value for future event handlers
+        # Existing event handlers keep their original concurrency_limit
+        # A page refresh is needed for this to fully take effect
+        return f"Concurrency updated to {MAX_CONCURRENT}. You may need to refresh the page for all changes to take effect."
+    except ValueError:
+        return f"Error: Invalid number. Keeping current value: {MAX_CONCURRENT}"
 # Start the SSH tunnel in a background thread
 if __name__ == "__main__":
     # Start the SSH tunnel
         # Clear button
         clear_btn = gr.Button("Clear Chat")
+        # Set up submit event chain with concurrency limit
         submit_event = textbox.submit(
             fn=process_chat,
             inputs=[textbox, chatbot],
+            outputs=chatbot,
+            concurrency_limit=MAX_CONCURRENT  # Set concurrency limit for this event
         ).then(
             fn=lambda: {"text": "", "files": []},
             inputs=None,
             outputs=textbox
         )
+        # Connect the submit button to the same functions with same concurrency limit
         submit_btn.click(
             fn=process_chat,
             inputs=[textbox, chatbot],
+            outputs=chatbot,
+            concurrency_limit=MAX_CONCURRENT  # Set concurrency limit for this event
         ).then(
             fn=lambda: {"text": "", "files": []},
             inputs=None,
         # Refresh status button and toggle API button
         with gr.Row():
             refresh_btn = gr.Button("Refresh Status")
         # Set up refresh status button
         refresh_btn.click(
             outputs=status_text
         )
         # Just load the initial status without auto-refresh
         demo.load(
             fn=get_tunnel_status_message,
             outputs=status_text
         )
+    # Launch the interface with the specified concurrency setting
+    demo.queue(default_concurrency_limit=MAX_CONCURRENT)
     demo.launch()