LPX55 commited on
Commit
45a5338
·
verified ·
1 Parent(s): 9f0fb62

Update app_v2.py

Browse files
Files changed (1) hide show
  1. app_v2.py +8 -17
app_v2.py CHANGED
@@ -1,23 +1,21 @@
1
  import torch
2
  import spaces
3
  import os
4
- import gradio as gr
5
-
6
  from diffusers.utils import load_image
7
  from diffusers.hooks import apply_group_offloading
8
  from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
9
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
10
  from transformers import T5EncoderModel
11
- from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor, AutoTokenizer
12
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
13
  from liger_kernel.transformers import apply_liger_kernel_to_llama
14
  from PIL import Image
15
  from threading import Thread
16
  from typing import Generator
17
  from peft import PeftModel, PeftConfig
 
18
 
19
  huggingface_token = os.getenv("HUGGINFACE_TOKEN")
20
- sys_prompt = os.getenv("SYS")
21
  MAX_SEED = 1000000
22
  MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
23
  cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
@@ -41,7 +39,7 @@ pipe = FluxControlNetPipeline.from_pretrained(
41
  )
42
  pipe.to("cuda")
43
 
44
- @spaces.GPU(duration=10)
45
  @torch.no_grad()
46
  def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
47
  torch.cuda.empty_cache()
@@ -80,12 +78,8 @@ def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: fl
80
 
81
  output = cap_model.generate(**generate_kwargs)
82
  print(f"Generated {len(output[0])} tokens")
83
- print(f"Generated {type(output)}")
84
- print(f"Generated {output}")
85
-
86
- #return output[0]
87
 
88
- @spaces.GPU(duration=10)
89
  @torch.no_grad()
90
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
91
  generator = torch.Generator().manual_seed(seed)
@@ -96,7 +90,6 @@ def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_
96
  h = h - h % 32
97
  control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2) # Resample.BILINEAR
98
  print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
99
- print("Cond Prompt: " + str(prompt))
100
  with torch.inference_mode():
101
  image = pipe(
102
  generator=generator,
@@ -154,8 +147,6 @@ def process_image(control_image, user_prompt, system_prompt, scale, steps,
154
  seed=seed,
155
  guidance_end=guidance_end
156
  )
157
- print(caption_gen)
158
- print(generated_caption)
159
  yield f"Completed! Used prompt: {final_prompt}", image
160
  except Exception as e:
161
  yield f"Error: {str(e)}", None
@@ -173,14 +164,14 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
173
  generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
174
  with gr.Row():
175
  with gr.Column(scale=1):
176
- prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt", interactive=True)
177
  output_caption = gr.Textbox(label="Caption")
178
  scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
179
  generate_button = gr.Button("Generate Image", variant="primary")
180
  caption_button = gr.Button("Generate Caption", variant="secondary")
181
  with gr.Column(scale=1):
182
  seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
183
- steps = gr.Slider(2, 16, value=8, label="Steps", step=1)
184
  controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
185
  guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
186
  guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
@@ -188,7 +179,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
188
  with gr.Accordion("Generation settings", open=False):
189
  system_prompt = gr.Textbox(
190
  lines=4,
191
- value=sys_prompt,
192
  label="System Prompt for Captioning",
193
  visible=True # Changed to visible
194
  )
@@ -220,7 +211,7 @@ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as iface:
220
  controlnet_conditioning_scale, guidance_scale, seed,
221
  guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
222
  ],
223
- outputs=[prompt, generated_image]
224
  )
225
 
226
  caption_button.click(
 
1
  import torch
2
  import spaces
3
  import os
 
 
4
  from diffusers.utils import load_image
5
  from diffusers.hooks import apply_group_offloading
6
  from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
7
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
8
  from transformers import T5EncoderModel
9
+ from transformers import LlavaForConditionalGeneration, TextIteratorStreamer, AutoProcessor
10
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
11
  from liger_kernel.transformers import apply_liger_kernel_to_llama
12
  from PIL import Image
13
  from threading import Thread
14
  from typing import Generator
15
  from peft import PeftModel, PeftConfig
16
+ import gradio as gr
17
 
18
  huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 
19
  MAX_SEED = 1000000
20
  MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
21
  cap_processor = AutoProcessor.from_pretrained(MODEL_PATH)
 
39
  )
40
  pipe.to("cuda")
41
 
42
+ @spaces.GPU()
43
  @torch.no_grad()
44
  def caption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
45
  torch.cuda.empty_cache()
 
78
 
79
  output = cap_model.generate(**generate_kwargs)
80
  print(f"Generated {len(output[0])} tokens")
 
 
 
 
81
 
82
+ @spaces.GPU()
83
  @torch.no_grad()
84
  def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
85
  generator = torch.Generator().manual_seed(seed)
 
90
  h = h - h % 32
91
  control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2) # Resample.BILINEAR
92
  print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
 
93
  with torch.inference_mode():
94
  image = pipe(
95
  generator=generator,
 
147
  seed=seed,
148
  guidance_end=guidance_end
149
  )
 
 
150
  yield f"Completed! Used prompt: {final_prompt}", image
151
  except Exception as e:
152
  yield f"Error: {str(e)}", None
 
164
  generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
165
  with gr.Row():
166
  with gr.Column(scale=1):
167
+ prompt = gr.Textbox(lines=4, placeholder="Enter your prompt here...", label="Prompt")
168
  output_caption = gr.Textbox(label="Caption")
169
  scale = gr.Slider(1, 3, value=1, label="Scale", step=0.25)
170
  generate_button = gr.Button("Generate Image", variant="primary")
171
  caption_button = gr.Button("Generate Caption", variant="secondary")
172
  with gr.Column(scale=1):
173
  seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
174
+ steps = gr.Slider(2, 16, value=8, label="Steps")
175
  controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
176
  guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
177
  guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
 
179
  with gr.Accordion("Generation settings", open=False):
180
  system_prompt = gr.Textbox(
181
  lines=4,
182
+ value="Write a straightforward caption for this image. Begin with the main subject and medium. Mention pivotal elements—people, objects, scenery—using confident, definite language. Focus on concrete details like color, shape, texture, and spatial relationships. Show how elements interact. Omit mood and speculative wording. If text is present, quote it exactly. Note any watermarks, signatures, or compression artifacts. Never mention what's absent, resolution, or unobservable details. Vary your sentence structure and keep the description concise, without starting with 'This image is…' or similar phrasing.",
183
  label="System Prompt for Captioning",
184
  visible=True # Changed to visible
185
  )
 
211
  controlnet_conditioning_scale, guidance_scale, seed,
212
  guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
213
  ],
214
+ outputs=[output_caption, generated_image]
215
  )
216
 
217
  caption_button.click(