FluxM-Lightning-Upscaler

Running on Zero

App Files Files Community

LPX55 commited on 30 days ago

Commit

45a5338

verified ·

1 Parent(s): 9f0fb62

Update app_v2.py

Browse files

Files changed (1) hide show

app_v2.py +8 -17

app_v2.py CHANGED Viewed

@@ -1,23 +1,21 @@
 import torch
 import spaces
 import os
-import gradio as gr
 from diffusers.utils import load_image
 from diffusers.hooks import apply_group_offloading
 from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 from transformers import T5EncoderModel
-from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor, AutoTokenizer
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
 from liger_kernel.transformers import apply_liger_kernel_to_llama
 from PIL import Image
 from threading import Thread
 from typing import Generator
 from peft import PeftModel, PeftConfig
 huggingface_token = os.getenv("HUGGINFACE_TOKEN")
-sys_prompt = os.getenv("SYS")
 MAX_SEED = 1000000
 MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
 cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
@@ -41,7 +39,7 @@ pipe = FluxControlNetPipeline.from_pretrained(
 )
 pipe.to("cuda")
-@spaces.GPU(duration=10)
 @torch.no_grad()
 def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
     torch.cuda.empty_cache()
@@ -80,12 +78,8 @@ def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: fl
     output = cap_model.generate(**generate_kwargs)
     print(f"Generated {len(output[0])} tokens")
-    print(f"Generated {type(output)}")
-    print(f"Generated {output}")
-    #return output[0]
-@spaces.GPU(duration=10)
 @torch.no_grad()
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
     generator = torch.Generator().manual_seed(seed)
@@ -96,7 +90,6 @@ def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_
     h = h - h % 32
     control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2)  # Resample.BILINEAR
     print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
-    print("Cond Prompt: " + str(prompt))
     with torch.inference_mode():
         image = pipe(
             generator=generator,
@@ -154,8 +147,6 @@ def process_image(control_image, user_prompt, system_prompt, scale, steps,
             seed=seed,
             guidance_end=guidance_end
         )
-        print(caption_gen)
-        print(generated_caption)
         yield f"Completed! Used prompt: {final_prompt}", image
     except Exception as e:
         yield f"Error: {str(e)}", None
@@ -173,14 +164,14 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
         generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
     with gr.Row():
         with gr.Column(scale=1):
-            prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt", interactive=True)
             output_caption = gr.Textbox(label="Caption")
             scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
             generate_button = gr.Button("Generate Image", variant="primary")
             caption_button = gr.Button("Generate Caption", variant="secondary")
         with gr.Column(scale=1):
             seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
-            steps = gr.Slider(2, 16, value=8, label="Steps", step=1)
             controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
             guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
             guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
@@ -188,7 +179,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
         with gr.Accordion("Generation settings", open=False):
             system_prompt = gr.Textbox(
                 lines=4,
-                value=sys_prompt,
                 label="System Prompt for Captioning",
                 visible=True  # Changed to visible
             )
@@ -220,7 +211,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
             controlnet_conditioning_scale, guidance_scale, seed,
             guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
         ],
-        outputs=[prompt, generated_image]
     )
     caption_button.click(

 import torch
 import spaces
 import os
 from diffusers.utils import load_image
 from diffusers.hooks import apply_group_offloading
 from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 from transformers import T5EncoderModel
+from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
 from liger_kernel.transformers import apply_liger_kernel_to_llama
 from PIL import Image
 from threading import Thread
 from typing import Generator
 from peft import PeftModel, PeftConfig
+import gradio as gr
 huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 MAX_SEED = 1000000
 MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
 cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
 )
 pipe.to("cuda")
+@spaces.GPU()
 @torch.no_grad()
 def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
     torch.cuda.empty_cache()
     output = cap_model.generate(**generate_kwargs)
     print(f"Generated {len(output[0])} tokens")
+@spaces.GPU()
 @torch.no_grad()
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
     generator = torch.Generator().manual_seed(seed)
     h = h - h % 32
     control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2)  # Resample.BILINEAR
     print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
     with torch.inference_mode():
         image = pipe(
             generator=generator,
             seed=seed,
             guidance_end=guidance_end
         )
         yield f"Completed! Used prompt: {final_prompt}", image
     except Exception as e:
         yield f"Error: {str(e)}", None
         generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
     with gr.Row():
         with gr.Column(scale=1):
+            prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt")
             output_caption = gr.Textbox(label="Caption")
             scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
             generate_button = gr.Button("Generate Image", variant="primary")
             caption_button = gr.Button("Generate Caption", variant="secondary")
         with gr.Column(scale=1):
             seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
+            steps = gr.Slider(2, 16, value=8, label="Steps")
             controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
             guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
             guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
         with gr.Accordion("Generation settings", open=False):
             system_prompt = gr.Textbox(
                 lines=4,
+                value="Write a straightforward caption for this image. Begin with the main subject and medium. Mention pivotal elements—people, objects, scenery—using confident, definite language. Focus on concrete details like color, shape, texture, and spatial relationships. Show how elements interact. Omit mood and speculative wording. If text is present, quote it exactly. Note any watermarks, signatures, or compression artifacts. Never mention what's absent, resolution, or unobservable details. Vary your sentence structure and keep the description concise, without starting with 'This image is…' or similar phrasing.",
                 label="System Prompt for Captioning",
                 visible=True  # Changed to visible
             )
             controlnet_conditioning_scale, guidance_scale, seed,
             guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
         ],
+        outputs=[output_caption, generated_image]
     )
     caption_button.click(