Spaces:

wavespeed
/

hidream-arena

Running

App Files Files Community

chengzeyi commited on 9 days ago

Commit

3cf3832

1 Parent(s): 2eb2f52

add rate limit

Browse files

Files changed (1) hide show

app.py +106 -68

app.py CHANGED Viewed

@@ -64,10 +64,9 @@ class BackendStatus:
         self.status = "completed"
         self.progress = 100
         self.end_time = time.time()
-        self.history.append({
-            "timestamp": datetime.now(),
-            "duration": self.end_time - self.start_time
-        })
     def fail(self):
         self.status = "failed"
@@ -94,8 +93,10 @@ class SessionManager:
         with cls._lock:
             to_remove = []
             for session_id, manager in cls._instances.items():
-                if (hasattr(manager, "last_activity")
-                        and current_time - manager.last_activity > max_age):
                     to_remove.append(session_id)
             for session_id in to_remove:
@@ -105,15 +106,24 @@ class SessionManager:
 class GenerationManager:
     def __init__(self):
-        self.backend_statuses = {
-            backend: BackendStatus()
-            for backend in BACKENDS
-        }
         self.last_activity = time.time()
     def update_activity(self):
         self.last_activity = time.time()
     def get_performance_plot(self):
         fig = go.Figure()
@@ -127,21 +137,24 @@ class GenerationManager:
                 # Use bar chart instead of box plot
                 fig.add_trace(
                     go.Bar(
-                        y=[avg_duration],  #
                         x=[BACKENDS[backend]["name"]],  # Backend name
                         name=BACKENDS[backend]["name"],
                         marker_color=BACKENDS[backend]["color"],
                         text=[f"{avg_duration:.2f}s"],  # Show time in seconds
                         textposition="auto",
                         width=[0.5],  # Make bars narrower
-                    ))
         # Set a minimum y-axis range if we have data
         if has_data:
-            max_duration = max([
-                max([h["duration"] for h in status.history] or [0])
-                for status in self.backend_statuses.values()
-            ])
             # Add 20% padding to the top
             y_max = max_duration * 1.2
             # Ensure the y-axis always starts at 0
@@ -196,19 +209,15 @@ class GenerationManager:
             # Use aiohttp instead of requests for async
             async with aiohttp.ClientSession() as session:
-                async with session.post(url, headers=headers,
-                                        json=payload) as response:
                     if response.status == 200:
                         result = await response.json()
                         request_id = result["data"]["id"]
-                        print(
-                            f"Task submitted successfully. Request ID: {request_id}"
-                        )
                         return request_id
                     else:
                         text = await response.text()
-                        raise Exception(
-                            f"API error: {response.status}, {text}")
         except Exception as e:
             status.fail()
@@ -296,9 +305,9 @@ async def poll_once(manager, backend, request_id):
                         # It's base64 data - format it as a data URI if needed
                         try:
                             # Format as data URI for Gradio to display directly
-                            if isinstance(
-                                    output, str
-                            ) and not output.startswith("data:image"):
                                 # Convert raw base64 to data URI format
                                 return f"data:image/jpeg;base64,{output}"
                             else:
@@ -306,8 +315,7 @@ async def poll_once(manager, backend, request_id):
                                 return output
                         except Exception as e:
                             print(f"Error processing base64 image: {e}")
-                            raise Exception(
-                                f"Failed to process base64 image: {str(e)}")
                 elif current_status == "failed":
                     manager.backend_statuses[backend].fail()
@@ -338,19 +346,25 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌊 WaveSpeedAI HiDream Arena")
     # Add the introduction with link to WaveSpeedAI
-    gr.Markdown("""
         [WaveSpeedAI](https://wavespeed.ai/) is the global pioneer in accelerating AI-powered video and image generation.
         Our in-house inference accelerator provides lossless speedup on image & video generation based on our rich inference optimization software stack, including our in-house inference compiler, CUDA kernel libraries and parallel computing libraries.
-        """)
-    gr.Markdown("""
         This demo showcases the performance and outputs of leading image generation models, including HiDream and Flux, on our accelerated inference platform.
-        """)
     with gr.Row():
         with gr.Column(scale=3):
-            example_dropdown = gr.Dropdown(choices=example_prompts,
-                                           label="Choose an example prompt",
-                                           interactive=True)
             input_text = gr.Textbox(
                 example_prompts[0],
                 label="Enter your prompt",
@@ -360,20 +374,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             generate_btn = gr.Button("Generate", variant="primary")
-    example_dropdown.change(lambda ex: ex,
-                            inputs=[example_dropdown],
-                            outputs=[input_text])
     # Two status boxes - small (default) and big (during generation)
-    small_status_box = gr.Markdown("Ready to generate images",
-                                   elem_id="small-status")
     # Big status box in its own row with styling
     with gr.Row(elem_id="big-status-row"):
-        big_status_box = gr.Markdown("",
-                                     elem_id="big-status",
-                                     visible=False,
-                                     elem_classes="big-status-box")
     with gr.Row():
         with gr.Column():
@@ -386,27 +398,27 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     performance_plot = gr.Plot(label="Performance Metrics")
     with gr.Accordion("Recent Generations (last 16)", open=False):
-        recent_gallery = gr.Gallery(label="Prompt and Output",
-                                    columns=3,
-                                    interactive=False)
     def get_recent_gallery_items():
         gallery_items = []
         for r in reversed(recent_generations):
             gallery_items.append((r["flux-dev"], f"FLUX-dev: {r['prompt']}"))
-            gallery_items.append(
-                (r["hidream-dev"], f"HiDream-dev: {r['prompt']}"))
-            gallery_items.append(
-                (r["hidream-full"], f"HiDream-full: {r['prompt']}"))
         return gallery_items
     def update_recent_gallery(prompt, results):
-        recent_generations.append({
-            "prompt": prompt,
-            "flux-dev": results["flux-dev"],
-            "hidream-dev": results["hidream-dev"],
-            "hidream-full": results["hidream-full"],
-        })
         if len(recent_generations) > 16:
             recent_generations.pop(0)
         gallery_items = get_recent_gallery_items()
@@ -457,13 +469,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML(f"<style>{css}</style>")
     # Update the generation function to use session manager
-    async def generate_all_backends_with_status_boxes(prompt,
-                                                      current_session_id):
         """Generate images with big status box during generation"""
         # Get or create a session manager
         session_id, manager = SessionManager.get_manager(current_session_id)
         manager.update_activity()
         # IMPORTANT: Reset history when starting a new generation
         if prompt and prompt.strip() != "":
             manager.reset_history()  # Clear previous performance metrics
@@ -523,8 +556,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             poll_attempt = 0
             # Main polling loop
-            while len(completed_backends
-                      ) < 3 and poll_attempt < max_poll_attempts:
                 poll_attempt += 1
                 # Poll each pending backend
@@ -536,8 +568,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                         # Only do actual API calls every few attempts to reduce load
                         if poll_attempt % 2 == 0 or backend == "flux-dev":
                             # Use the session manager instead of global manager
-                            result = await poll_once(manager, backend,
-                                                     request_ids[backend])
                             if result:  # Backend completed
                                 results[backend] = result
                                 completed_backends.add(backend)
@@ -551,8 +584,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                                     results["flux-dev"],
                                     results["hidream-dev"],
                                     results["hidream-full"],
-                                    (manager.get_performance_plot()
-                                     if any(completed_backends) else None),
                                     session_id,
                                     None,
                                 )
@@ -563,9 +599,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 await asyncio.sleep(0.1)
             # Final status
-            final_status = ("✅ All generations completed!"
-                            if len(completed_backends) == 3 else
-                            "⚠️ Some generations timed out")
             gallery_update = update_recent_gallery(prompt, results)

         self.status = "completed"
         self.progress = 100
         self.end_time = time.time()
+        self.history.append(
+            {"timestamp": datetime.now(), "duration": self.end_time - self.start_time}
+        )
     def fail(self):
         self.status = "failed"
         with cls._lock:
             to_remove = []
             for session_id, manager in cls._instances.items():
+                if (
+                    hasattr(manager, "last_activity")
+                    and current_time - manager.last_activity > max_age
+                ):
                     to_remove.append(session_id)
             for session_id in to_remove:
 class GenerationManager:
     def __init__(self):
+        self.backend_statuses = {backend: BackendStatus() for backend in BACKENDS}
         self.last_activity = time.time()
+        self.request_timestamps = []  # Track timestamps of requests
     def update_activity(self):
         self.last_activity = time.time()
+    def add_request_timestamp(self):
+        self.request_timestamps.append(time.time())
+    def has_exceeded_limit(self, limit=10):  # Default limit: 10 requests per hour
+        current_time = time.time()
+        # Filter timestamps to only include those within the last hour
+        self.request_timestamps = [
+            ts for ts in self.request_timestamps if current_time - ts <= 3600
+        ]
+        return len(self.request_timestamps) >= limit
     def get_performance_plot(self):
         fig = go.Figure()
                 # Use bar chart instead of box plot
                 fig.add_trace(
                     go.Bar(
+                        y=[avg_duration],  #
                         x=[BACKENDS[backend]["name"]],  # Backend name
                         name=BACKENDS[backend]["name"],
                         marker_color=BACKENDS[backend]["color"],
                         text=[f"{avg_duration:.2f}s"],  # Show time in seconds
                         textposition="auto",
                         width=[0.5],  # Make bars narrower
+                    )
+                )
         # Set a minimum y-axis range if we have data
         if has_data:
+            max_duration = max(
+                [
+                    max([h["duration"] for h in status.history] or [0])
+                    for status in self.backend_statuses.values()
+                ]
+            )
             # Add 20% padding to the top
             y_max = max_duration * 1.2
             # Ensure the y-axis always starts at 0
             # Use aiohttp instead of requests for async
             async with aiohttp.ClientSession() as session:
+                async with session.post(url, headers=headers, json=payload) as response:
                     if response.status == 200:
                         result = await response.json()
                         request_id = result["data"]["id"]
+                        print(f"Task submitted successfully. Request ID: {request_id}")
                         return request_id
                     else:
                         text = await response.text()
+                        raise Exception(f"API error: {response.status}, {text}")
         except Exception as e:
             status.fail()
                         # It's base64 data - format it as a data URI if needed
                         try:
                             # Format as data URI for Gradio to display directly
+                            if isinstance(output, str) and not output.startswith(
+                                "data:image"
+                            ):
                                 # Convert raw base64 to data URI format
                                 return f"data:image/jpeg;base64,{output}"
                             else:
                                 return output
                         except Exception as e:
                             print(f"Error processing base64 image: {e}")
+                            raise Exception(f"Failed to process base64 image: {str(e)}")
                 elif current_status == "failed":
                     manager.backend_statuses[backend].fail()
     gr.Markdown("# 🌊 WaveSpeedAI HiDream Arena")
     # Add the introduction with link to WaveSpeedAI
+    gr.Markdown(
+        """
         [WaveSpeedAI](https://wavespeed.ai/) is the global pioneer in accelerating AI-powered video and image generation.
         Our in-house inference accelerator provides lossless speedup on image & video generation based on our rich inference optimization software stack, including our in-house inference compiler, CUDA kernel libraries and parallel computing libraries.
+        """
+    )
+    gr.Markdown(
+        """
         This demo showcases the performance and outputs of leading image generation models, including HiDream and Flux, on our accelerated inference platform.
+        """
+    )
     with gr.Row():
         with gr.Column(scale=3):
+            example_dropdown = gr.Dropdown(
+                choices=example_prompts,
+                label="Choose an example prompt",
+                interactive=True,
+            )
             input_text = gr.Textbox(
                 example_prompts[0],
                 label="Enter your prompt",
         with gr.Column(scale=1):
             generate_btn = gr.Button("Generate", variant="primary")
+    example_dropdown.change(
+        lambda ex: ex, inputs=[example_dropdown], outputs=[input_text]
+    )
     # Two status boxes - small (default) and big (during generation)
+    small_status_box = gr.Markdown("Ready to generate images", elem_id="small-status")
     # Big status box in its own row with styling
     with gr.Row(elem_id="big-status-row"):
+        big_status_box = gr.Markdown(
+            "", elem_id="big-status", visible=False, elem_classes="big-status-box"
+        )
     with gr.Row():
         with gr.Column():
     performance_plot = gr.Plot(label="Performance Metrics")
     with gr.Accordion("Recent Generations (last 16)", open=False):
+        recent_gallery = gr.Gallery(
+            label="Prompt and Output", columns=3, interactive=False
+        )
     def get_recent_gallery_items():
         gallery_items = []
         for r in reversed(recent_generations):
             gallery_items.append((r["flux-dev"], f"FLUX-dev: {r['prompt']}"))
+            gallery_items.append((r["hidream-dev"], f"HiDream-dev: {r['prompt']}"))
+            gallery_items.append((r["hidream-full"], f"HiDream-full: {r['prompt']}"))
         return gallery_items
     def update_recent_gallery(prompt, results):
+        recent_generations.append(
+            {
+                "prompt": prompt,
+                "flux-dev": results["flux-dev"],
+                "hidream-dev": results["hidream-dev"],
+                "hidream-full": results["hidream-full"],
+            }
+        )
         if len(recent_generations) > 16:
             recent_generations.pop(0)
         gallery_items = get_recent_gallery_items()
     gr.HTML(f"<style>{css}</style>")
     # Update the generation function to use session manager
+    async def generate_all_backends_with_status_boxes(prompt, current_session_id):
         """Generate images with big status box during generation"""
         # Get or create a session manager
         session_id, manager = SessionManager.get_manager(current_session_id)
         manager.update_activity()
+        # Check if the user has exceeded the request limit
+        if manager.has_exceeded_limit(
+            limit=10
+        ):  # Set the limit to 10 requests per hour
+            error_message = "❌ You have exceeded the limit of 10 requests per hour. Please try again later."
+            yield (
+                error_message,
+                error_message,
+                gr.update(visible=False),
+                gr.update(visible=True),
+                None,
+                None,
+                None,
+                None,
+                session_id,
+                None,
+            )
+            return
+        # Add the current request timestamp
+        manager.add_request_timestamp()
         # IMPORTANT: Reset history when starting a new generation
         if prompt and prompt.strip() != "":
             manager.reset_history()  # Clear previous performance metrics
             poll_attempt = 0
             # Main polling loop
+            while len(completed_backends) < 3 and poll_attempt < max_poll_attempts:
                 poll_attempt += 1
                 # Poll each pending backend
                         # Only do actual API calls every few attempts to reduce load
                         if poll_attempt % 2 == 0 or backend == "flux-dev":
                             # Use the session manager instead of global manager
+                            result = await poll_once(
+                                manager, backend, request_ids[backend]
+                            )
                             if result:  # Backend completed
                                 results[backend] = result
                                 completed_backends.add(backend)
                                     results["flux-dev"],
                                     results["hidream-dev"],
                                     results["hidream-full"],
+                                    (
+                                        manager.get_performance_plot()
+                                        if any(completed_backends)
+                                        else None
+                                    ),
                                     session_id,
                                     None,
                                 )
                 await asyncio.sleep(0.1)
             # Final status
+            final_status = (
+                "✅ All generations completed!"
+                if len(completed_backends) == 3
+                else "⚠️ Some generations timed out"
+            )
             gallery_update = update_recent_gallery(prompt, results)