ProximileAdmin commited on
Commit
e0b5376
·
verified ·
1 Parent(s): d060636

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -15
app.py CHANGED
@@ -36,6 +36,9 @@ VLLM_MODEL = os.environ.get('MODEL_NAME', 'google/gemma-3-27b-it')
36
  HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
37
  FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
38
 
 
 
 
39
  # API endpoints
40
  VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
41
  HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
@@ -392,7 +395,7 @@ def get_tunnel_status_message():
392
  """
393
  Return a formatted status message for display in the UI.
394
  """
395
- global tunnel_status, use_fallback
396
 
397
  api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
398
  model = get_model_name()
@@ -400,7 +403,7 @@ def get_tunnel_status_message():
400
  status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
401
  status_text = tunnel_status["message"]
402
 
403
- return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}"
404
 
405
  def toggle_api():
406
  """
@@ -414,6 +417,30 @@ def toggle_api():
414
 
415
  return f"Switched to {api_mode} using {model}"
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  # Start the SSH tunnel in a background thread
418
  if __name__ == "__main__":
419
  # Start the SSH tunnel
@@ -451,22 +478,24 @@ if __name__ == "__main__":
451
  # Clear button
452
  clear_btn = gr.Button("Clear Chat")
453
 
454
- # Set up submit event chain
455
  submit_event = textbox.submit(
456
  fn=process_chat,
457
  inputs=[textbox, chatbot],
458
- outputs=chatbot
 
459
  ).then(
460
  fn=lambda: {"text": "", "files": []},
461
  inputs=None,
462
  outputs=textbox
463
  )
464
 
465
- # Connect the submit button to the same functions
466
  submit_btn.click(
467
  fn=process_chat,
468
  inputs=[textbox, chatbot],
469
- outputs=chatbot
 
470
  ).then(
471
  fn=lambda: {"text": "", "files": []},
472
  inputs=None,
@@ -509,7 +538,6 @@ if __name__ == "__main__":
509
  # Refresh status button and toggle API button
510
  with gr.Row():
511
  refresh_btn = gr.Button("Refresh Status")
512
- toggle_api_btn = gr.Button("Toggle API (Local/Hyperbolic)")
513
 
514
  # Set up refresh status button
515
  refresh_btn.click(
@@ -518,13 +546,6 @@ if __name__ == "__main__":
518
  outputs=status_text
519
  )
520
 
521
- # Set up toggle API button
522
- toggle_api_btn.click(
523
- fn=toggle_api,
524
- inputs=None,
525
- outputs=status_text
526
- )
527
-
528
  # Just load the initial status without auto-refresh
529
  demo.load(
530
  fn=get_tunnel_status_message,
@@ -532,5 +553,6 @@ if __name__ == "__main__":
532
  outputs=status_text
533
  )
534
 
535
- # Launch the interface on a different port than the SSH tunnel
 
536
  demo.launch()
 
36
  HYPERBOLIC_KEY = os.environ.get('HYPERBOLIC_XYZ_KEY')
37
  FALLBACK_MODEL = 'Qwen/Qwen2.5-VL-72B-Instruct' # Fallback model at Hyperbolic
38
 
39
+ # Set the maximum number of concurrent API calls before queuing
40
+ MAX_CONCURRENT = int(os.environ.get('MAX_CONCURRENT', 3)) # Default to 3 concurrent calls
41
+
42
  # API endpoints
43
  VLLM_ENDPOINT = "http://localhost:" + str(LOCAL_PORT) + "/v1"
44
  HYPERBOLIC_ENDPOINT = "https://api.hyperbolic.xyz/v1"
 
395
  """
396
  Return a formatted status message for display in the UI.
397
  """
398
+ global tunnel_status, use_fallback, MAX_CONCURRENT
399
 
400
  api_mode = "Hyperbolic API" if use_fallback else "Local vLLM API"
401
  model = get_model_name()
 
403
  status_color = "🟢" if (tunnel_status["is_running"] and not use_fallback) else "🔴"
404
  status_text = tunnel_status["message"]
405
 
406
+ return f"{status_color} Tunnel Status: {status_text}\nCurrent API: {api_mode}\nCurrent Model: {model}\nConcurrent Requests: {MAX_CONCURRENT}"
407
 
408
  def toggle_api():
409
  """
 
417
 
418
  return f"Switched to {api_mode} using {model}"
419
 
420
+ def update_concurrency(new_value):
421
+ """
422
+ Update the MAX_CONCURRENT value.
423
+
424
+ Args:
425
+ new_value (str): New concurrency value as string
426
+
427
+ Returns:
428
+ str: Status message
429
+ """
430
+ global MAX_CONCURRENT
431
+ try:
432
+ value = int(new_value)
433
+ if value < 1:
434
+ return f"Error: Concurrency must be at least 1. Keeping current value: {MAX_CONCURRENT}"
435
+
436
+ MAX_CONCURRENT = value
437
+ # Note: This only updates the value for future event handlers
438
+ # Existing event handlers keep their original concurrency_limit
439
+ # A page refresh is needed for this to fully take effect
440
+ return f"Concurrency updated to {MAX_CONCURRENT}. You may need to refresh the page for all changes to take effect."
441
+ except ValueError:
442
+ return f"Error: Invalid number. Keeping current value: {MAX_CONCURRENT}"
443
+
444
  # Start the SSH tunnel in a background thread
445
  if __name__ == "__main__":
446
  # Start the SSH tunnel
 
478
  # Clear button
479
  clear_btn = gr.Button("Clear Chat")
480
 
481
+ # Set up submit event chain with concurrency limit
482
  submit_event = textbox.submit(
483
  fn=process_chat,
484
  inputs=[textbox, chatbot],
485
+ outputs=chatbot,
486
+ concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
487
  ).then(
488
  fn=lambda: {"text": "", "files": []},
489
  inputs=None,
490
  outputs=textbox
491
  )
492
 
493
+ # Connect the submit button to the same functions with same concurrency limit
494
  submit_btn.click(
495
  fn=process_chat,
496
  inputs=[textbox, chatbot],
497
+ outputs=chatbot,
498
+ concurrency_limit=MAX_CONCURRENT # Set concurrency limit for this event
499
  ).then(
500
  fn=lambda: {"text": "", "files": []},
501
  inputs=None,
 
538
  # Refresh status button and toggle API button
539
  with gr.Row():
540
  refresh_btn = gr.Button("Refresh Status")
 
541
 
542
  # Set up refresh status button
543
  refresh_btn.click(
 
546
  outputs=status_text
547
  )
548
 
 
 
 
 
 
 
 
549
  # Just load the initial status without auto-refresh
550
  demo.load(
551
  fn=get_tunnel_status_message,
 
553
  outputs=status_text
554
  )
555
 
556
+ # Launch the interface with the specified concurrency setting
557
+ demo.queue(default_concurrency_limit=MAX_CONCURRENT)
558
  demo.launch()