Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -36,6 +36,9 @@ VLLM_MODEL = os.environ.get('MODEL_NAME', 'google/gemma-3-27b-it')
|
|
36 |
HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
|
37 |
FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
|
38 |
|
|
|
|
|
|
|
39 |
# API endpoints
|
40 |
VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
|
41 |
HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
|
@@ -392,7 +395,7 @@ def get_tunnel_status_message():
|
|
392 |
"""
|
393 |
Return a formatted status message for display in the UI.
|
394 |
"""
|
395 |
-
global tunnel_status, use_fallback
|
396 |
|
397 |
api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
|
398 |
model = get_model_name()
|
@@ -400,7 +403,7 @@ def get_tunnel_status_message():
|
|
400 |
status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
|
401 |
status_text = tunnel_status["message"]
|
402 |
|
403 |
-
return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}"
|
404 |
|
405 |
def toggle_api():
|
406 |
"""
|
@@ -414,6 +417,30 @@ def toggle_api():
|
|
414 |
|
415 |
return f"Switched to {api_mode} using {model}"
|
416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
# Start the SSH tunnel in a background thread
|
418 |
if __name__ == "__main__":
|
419 |
# Start the SSH tunnel
|
@@ -451,22 +478,24 @@ if __name__ == "__main__":
|
|
451 |
# Clear button
|
452 |
clear_btn = gr.Button("Clear Chat")
|
453 |
|
454 |
-
# Set up submit event chain
|
455 |
submit_event = textbox.submit(
|
456 |
fn=process_chat,
|
457 |
inputs=[textbox, chatbot],
|
458 |
-
outputs=chatbot
|
|
|
459 |
).then(
|
460 |
fn=lambda: {"text": "", "files": []},
|
461 |
inputs=None,
|
462 |
outputs=textbox
|
463 |
)
|
464 |
|
465 |
-
# Connect the submit button to the same functions
|
466 |
submit_btn.click(
|
467 |
fn=process_chat,
|
468 |
inputs=[textbox, chatbot],
|
469 |
-
outputs=chatbot
|
|
|
470 |
).then(
|
471 |
fn=lambda: {"text": "", "files": []},
|
472 |
inputs=None,
|
@@ -509,7 +538,6 @@ if __name__ == "__main__":
|
|
509 |
# Refresh status button and toggle API button
|
510 |
with gr.Row():
|
511 |
refresh_btn = gr.Button("Refresh Status")
|
512 |
-
toggle_api_btn = gr.Button("Toggle API (Local/Hyperbolic)")
|
513 |
|
514 |
# Set up refresh status button
|
515 |
refresh_btn.click(
|
@@ -518,13 +546,6 @@ if __name__ == "__main__":
|
|
518 |
outputs=status_text
|
519 |
)
|
520 |
|
521 |
-
# Set up toggle API button
|
522 |
-
toggle_api_btn.click(
|
523 |
-
fn=toggle_api,
|
524 |
-
inputs=None,
|
525 |
-
outputs=status_text
|
526 |
-
)
|
527 |
-
|
528 |
# Just load the initial status without auto-refresh
|
529 |
demo.load(
|
530 |
fn=get_tunnel_status_message,
|
@@ -532,5 +553,6 @@ if __name__ == "__main__":
|
|
532 |
outputs=status_text
|
533 |
)
|
534 |
|
535 |
-
# Launch the interface
|
|
|
536 |
demo.launch()
|
|
|
36 |
HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
|
37 |
FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
|
38 |
|
39 |
+
# Set the maximum number of concurrent API calls before queuing
|
40 |
+
MAX_CONCURRENT = int(os.environ.get('MAX_CONCURRENT', 3)) # Default to 3 concurrent calls
|
41 |
+
|
42 |
# API endpoints
|
43 |
VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
|
44 |
HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
|
|
|
395 |
"""
|
396 |
Return a formatted status message for display in the UI.
|
397 |
"""
|
398 |
+
global tunnel_status, use_fallback, MAX_CONCURRENT
|
399 |
|
400 |
api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
|
401 |
model = get_model_name()
|
|
|
403 |
status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
|
404 |
status_text = tunnel_status["message"]
|
405 |
|
406 |
+
return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}\nConcurrent Requests: {MAX_CONCURRENT}"
|
407 |
|
408 |
def toggle_api():
|
409 |
"""
|
|
|
417 |
|
418 |
return f"Switched to {api_mode} using {model}"
|
419 |
|
420 |
+
def update_concurrency(new_value):
|
421 |
+
"""
|
422 |
+
Update the MAX_CONCURRENT value.
|
423 |
+
|
424 |
+
Args:
|
425 |
+
new_value (str): New concurrency value as string
|
426 |
+
|
427 |
+
Returns:
|
428 |
+
str: Status message
|
429 |
+
"""
|
430 |
+
global MAX_CONCURRENT
|
431 |
+
try:
|
432 |
+
value = int(new_value)
|
433 |
+
if value < 1:
|
434 |
+
return f"Error: Concurrency must be at least 1. Keeping current value: {MAX_CONCURRENT}"
|
435 |
+
|
436 |
+
MAX_CONCURRENT = value
|
437 |
+
# Note: This only updates the value for future event handlers
|
438 |
+
# Existing event handlers keep their original concurrency_limit
|
439 |
+
# A page refresh is needed for this to fully take effect
|
440 |
+
return f"Concurrency updated to {MAX_CONCURRENT}. You may need to refresh the page for all changes to take effect."
|
441 |
+
except ValueError:
|
442 |
+
return f"Error: Invalid number. Keeping current value: {MAX_CONCURRENT}"
|
443 |
+
|
444 |
# Start the SSH tunnel in a background thread
|
445 |
if __name__ == "__main__":
|
446 |
# Start the SSH tunnel
|
|
|
478 |
# Clear button
|
479 |
clear_btn = gr.Button("Clear Chat")
|
480 |
|
481 |
+
# Set up submit event chain with concurrency limit
|
482 |
submit_event = textbox.submit(
|
483 |
fn=process_chat,
|
484 |
inputs=[textbox, chatbot],
|
485 |
+
outputs=chatbot,
|
486 |
+
concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
|
487 |
).then(
|
488 |
fn=lambda: {"text": "", "files": []},
|
489 |
inputs=None,
|
490 |
outputs=textbox
|
491 |
)
|
492 |
|
493 |
+
# Connect the submit button to the same functions with same concurrency limit
|
494 |
submit_btn.click(
|
495 |
fn=process_chat,
|
496 |
inputs=[textbox, chatbot],
|
497 |
+
outputs=chatbot,
|
498 |
+
concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
|
499 |
).then(
|
500 |
fn=lambda: {"text": "", "files": []},
|
501 |
inputs=None,
|
|
|
538 |
# Refresh status button and toggle API button
|
539 |
with gr.Row():
|
540 |
refresh_btn = gr.Button("Refresh Status")
|
|
|
541 |
|
542 |
# Set up refresh status button
|
543 |
refresh_btn.click(
|
|
|
546 |
outputs=status_text
|
547 |
)
|
548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
549 |
# Just load the initial status without auto-refresh
|
550 |
demo.load(
|
551 |
fn=get_tunnel_status_message,
|
|
|
553 |
outputs=status_text
|
554 |
)
|
555 |
|
556 |
+
# Launch the interface with the specified concurrency setting
|
557 |
+
demo.queue(default_concurrency_limit=MAX_CONCURRENT)
|
558 |
demo.launch()
|