Spaces:
Running
on
Zero
Running
on
Zero
Update app_v2.py
Browse files
app_v2.py
CHANGED
@@ -1,23 +1,21 @@
|
|
1 |
import torch
|
2 |
import spaces
|
3 |
import os
|
4 |
-
import gradio as gr
|
5 |
-
|
6 |
from diffusers.utils import load_image
|
7 |
from diffusers.hooks import apply_group_offloading
|
8 |
from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
|
9 |
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
|
10 |
from transformers import T5EncoderModel
|
11 |
-
from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor
|
12 |
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
|
13 |
from liger_kernel.transformers import apply_liger_kernel_to_llama
|
14 |
from PIL import Image
|
15 |
from threading import Thread
|
16 |
from typing import Generator
|
17 |
from peft import PeftModel, PeftConfig
|
|
|
18 |
|
19 |
huggingface_token = os.getenv("HUGGINFACE_TOKEN")
|
20 |
-
sys_prompt = os.getenv("SYS")
|
21 |
MAX_SEED = 1000000
|
22 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
23 |
cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
@@ -41,7 +39,7 @@ pipe = FluxControlNetPipeline.from_pretrained(
|
|
41 |
)
|
42 |
pipe.to("cuda")
|
43 |
|
44 |
-
@spaces.GPU(
|
45 |
@torch.no_grad()
|
46 |
def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
47 |
torch.cuda.empty_cache()
|
@@ -80,12 +78,8 @@ def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: fl
|
|
80 |
|
81 |
output = cap_model.generate(**generate_kwargs)
|
82 |
print(f"Generated {len(output[0])} tokens")
|
83 |
-
print(f"Generated {type(output)}")
|
84 |
-
print(f"Generated {output}")
|
85 |
-
|
86 |
-
#return output[0]
|
87 |
|
88 |
-
@spaces.GPU(
|
89 |
@torch.no_grad()
|
90 |
def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
|
91 |
generator = torch.Generator().manual_seed(seed)
|
@@ -96,7 +90,6 @@ def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_
|
|
96 |
h = h - h % 32
|
97 |
control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2) # Resample.BILINEAR
|
98 |
print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
|
99 |
-
print("Cond Prompt: " + str(prompt))
|
100 |
with torch.inference_mode():
|
101 |
image = pipe(
|
102 |
generator=generator,
|
@@ -154,8 +147,6 @@ def process_image(control_image, user_prompt, system_prompt, scale, steps,
|
|
154 |
seed=seed,
|
155 |
guidance_end=guidance_end
|
156 |
)
|
157 |
-
print(caption_gen)
|
158 |
-
print(generated_caption)
|
159 |
yield f"Completed! Used prompt: {final_prompt}", image
|
160 |
except Exception as e:
|
161 |
yield f"Error: {str(e)}", None
|
@@ -173,14 +164,14 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
|
|
173 |
generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
|
174 |
with gr.Row():
|
175 |
with gr.Column(scale=1):
|
176 |
-
prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt"
|
177 |
output_caption = gr.Textbox(label="Caption")
|
178 |
scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
|
179 |
generate_button = gr.Button("Generate Image", variant="primary")
|
180 |
caption_button = gr.Button("Generate Caption", variant="secondary")
|
181 |
with gr.Column(scale=1):
|
182 |
seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
|
183 |
-
steps = gr.Slider(2, 16, value=8, label="Steps"
|
184 |
controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
|
185 |
guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
|
186 |
guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
|
@@ -188,7 +179,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
|
|
188 |
with gr.Accordion("Generation settings", open=False):
|
189 |
system_prompt = gr.Textbox(
|
190 |
lines=4,
|
191 |
-
value=
|
192 |
label="System Prompt for Captioning",
|
193 |
visible=True # Changed to visible
|
194 |
)
|
@@ -220,7 +211,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
|
|
220 |
controlnet_conditioning_scale, guidance_scale, seed,
|
221 |
guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
|
222 |
],
|
223 |
-
outputs=[
|
224 |
)
|
225 |
|
226 |
caption_button.click(
|
|
|
1 |
import torch
|
2 |
import spaces
|
3 |
import os
|
|
|
|
|
4 |
from diffusers.utils import load_image
|
5 |
from diffusers.hooks import apply_group_offloading
|
6 |
from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
|
7 |
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
|
8 |
from transformers import T5EncoderModel
|
9 |
+
from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor
|
10 |
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
|
11 |
from liger_kernel.transformers import apply_liger_kernel_to_llama
|
12 |
from PIL import Image
|
13 |
from threading import Thread
|
14 |
from typing import Generator
|
15 |
from peft import PeftModel, PeftConfig
|
16 |
+
import gradio as gr
|
17 |
|
18 |
huggingface_token = os.getenv("HUGGINFACE_TOKEN")
|
|
|
19 |
MAX_SEED = 1000000
|
20 |
MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
|
21 |
cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
|
|
39 |
)
|
40 |
pipe.to("cuda")
|
41 |
|
42 |
+
@spaces.GPU()
|
43 |
@torch.no_grad()
|
44 |
def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
45 |
torch.cuda.empty_cache()
|
|
|
78 |
|
79 |
output = cap_model.generate(**generate_kwargs)
|
80 |
print(f"Generated {len(output[0])} tokens")
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
@spaces.GPU()
|
83 |
@torch.no_grad()
|
84 |
def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
|
85 |
generator = torch.Generator().manual_seed(seed)
|
|
|
90 |
h = h - h % 32
|
91 |
control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2) # Resample.BILINEAR
|
92 |
print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
|
|
|
93 |
with torch.inference_mode():
|
94 |
image = pipe(
|
95 |
generator=generator,
|
|
|
147 |
seed=seed,
|
148 |
guidance_end=guidance_end
|
149 |
)
|
|
|
|
|
150 |
yield f"Completed! Used prompt: {final_prompt}", image
|
151 |
except Exception as e:
|
152 |
yield f"Error: {str(e)}", None
|
|
|
164 |
generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
|
165 |
with gr.Row():
|
166 |
with gr.Column(scale=1):
|
167 |
+
prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt")
|
168 |
output_caption = gr.Textbox(label="Caption")
|
169 |
scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
|
170 |
generate_button = gr.Button("Generate Image", variant="primary")
|
171 |
caption_button = gr.Button("Generate Caption", variant="secondary")
|
172 |
with gr.Column(scale=1):
|
173 |
seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
|
174 |
+
steps = gr.Slider(2, 16, value=8, label="Steps")
|
175 |
controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
|
176 |
guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
|
177 |
guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
|
|
|
179 |
with gr.Accordion("Generation settings", open=False):
|
180 |
system_prompt = gr.Textbox(
|
181 |
lines=4,
|
182 |
+
value="Write a straightforward caption for this image. Begin with the main subject and medium. Mention pivotal elements—people, objects, scenery—using confident, definite language. Focus on concrete details like color, shape, texture, and spatial relationships. Show how elements interact. Omit mood and speculative wording. If text is present, quote it exactly. Note any watermarks, signatures, or compression artifacts. Never mention what's absent, resolution, or unobservable details. Vary your sentence structure and keep the description concise, without starting with 'This image is…' or similar phrasing.",
|
183 |
label="System Prompt for Captioning",
|
184 |
visible=True # Changed to visible
|
185 |
)
|
|
|
211 |
controlnet_conditioning_scale, guidance_scale, seed,
|
212 |
guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
|
213 |
],
|
214 |
+
outputs=[output_caption, generated_image]
|
215 |
)
|
216 |
|
217 |
caption_button.click(
|