LPX commited on
Commit
c80eda9
·
1 Parent(s): 7b18110

major: refactored app_v4.py and model_loader.py

Browse files
Files changed (3) hide show
  1. README.md +52 -2
  2. app_v4.py +235 -0
  3. model_loader.py +59 -0
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.44.1
8
- app_file: app_v3.py
9
  pinned: true
10
  license: other
11
  tags:
@@ -20,4 +20,54 @@ license_link: https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICE
20
  short_description: Lightning fast guided upscaling with FLUX.
21
  ---
22
 
23
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.44.1
8
+ app_file: app_v4.py
9
  pinned: true
10
  license: other
11
  tags:
 
20
  short_description: Lightning fast guided upscaling with FLUX.
21
  ---
22
 
23
+ # FLUX.1 Merged & Fused: Lightning Upscaler and Detailer
24
+
25
+ A high-performance image upscaling application built with FLUX.1 models, hosted on Hugging Face Spaces.
26
+
27
+ ## Core Components
28
+
29
+ - **Framework**: Gradio (v4.44.1)
30
+ - **Main Model**: FLUX.1M-8step_upscaler-cnet
31
+ - **Text Encoder**: T5EncoderModel from FLUX.1-merged_uncensored
32
+ - **Vision Model**: Moondream for image captioning
33
+
34
+ ## Key Features
35
+
36
+ 1. **Image Upscaling**
37
+ - ControlNet-based upscaling
38
+ - Scale factor: 1-3x
39
+ - 8-step inference for speed
40
+ - Memory-optimized with xFormers
41
+
42
+ 2. **Auto-Captioning**
43
+ - Uses Moondream for image analysis
44
+ - Generates detailed image descriptions
45
+ - Focus area specification
46
+
47
+ 3. **Performance Optimizations**
48
+ - Attention slicing
49
+ - Memory-efficient attention
50
+ - BFloat16 precision
51
+ - GPU acceleration
52
+
53
+ ## Environment Requirements
54
+
55
+ - PyTorch 2.4.0
56
+ - CUDA support
57
+ - Hugging Face token for model access
58
+ - Moondream API key
59
+
60
+ ## Usage
61
+
62
+ 1. Upload control image
63
+ 2. (Optional) Enter custom prompt or use auto-caption
64
+ 3. Adjust parameters:
65
+ - Scale (1-3x)
66
+ - Steps (2-16)
67
+ - ControlNet scale (0-1)
68
+ - Guidance scale (1-30)
69
+ - Seed (0-1000000)
70
+
71
+ ## License
72
+
73
+ Non-commercial license (FLUX.1-dev)
app_v4.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_v4.py
2
+ import gradio as gr
3
+ import torch
4
+ import spaces
5
+ import os
6
+ import datetime
7
+ import io
8
+ import moondream as md
9
+ from diffusers.utils import load_image
10
+ from PIL import Image
11
+ from threading import Thread
12
+ from typing import Generator
13
+ from huggingface_hub import CommitScheduler, HfApi, logging
14
+ from debug import log_params, scheduler, save_image
15
+ logging.set_verbosity_debug()
16
+ from model_loader import safe_model_load
17
+
18
+ # Ensure device is set
19
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
+ MAX_SEED = 1000000
21
+
22
+ model_cache = {"models": None}
23
+
24
+ @spaces.GPU(duration=12) # This function gets priority for GPU access
25
+ def load_warm_models():
26
+ """Special function to keep models warm in ZeroGPU"""
27
+ if model_cache["models"] is None:
28
+ model_cache["models"] = safe_model_load()
29
+ return model_cache["models"]
30
+
31
+ # This wrapper keeps the models loaded and accessible
32
+ def get_model():
33
+ """Get models from cache"""
34
+ if model_cache["models"] is None:
35
+ model_cache["models"] = load_warm_models()
36
+ return model_cache["models"]
37
+
38
+ # import subprocess
39
+ # # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
40
+
41
+ pipe = get_model()["pipeline"]
42
+ model = get_model()["model"]
43
+
44
+ @spaces.GPU(duration=12)
45
+ @torch.no_grad()
46
+ def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale, seed, guidance_end):
47
+ generator = torch.Generator().manual_seed(seed)
48
+ # Load control image
49
+ control_image = load_image(control_image)
50
+ w, h = control_image.size
51
+ w = w - w % 32
52
+ h = h - h % 32
53
+ control_image = control_image.resize((int(w * scale), int(h * scale)), resample=2) # Resample.BILINEAR
54
+ print("Size to: " + str(control_image.size[0]) + ", " + str(control_image.size[1]))
55
+ print(f"PromptLog: {repr(prompt)}")
56
+ with torch.inference_mode():
57
+ image = pipe(
58
+ generator=generator,
59
+ prompt=prompt,
60
+ control_image=control_image,
61
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
62
+ num_inference_steps=steps,
63
+ guidance_scale=guidance_scale,
64
+ height=control_image.size[1],
65
+ width=control_image.size[0],
66
+ control_guidance_start=0.0,
67
+ control_guidance_end=guidance_end,
68
+ ).images[0]
69
+ # print("Type: " + str(type(image)))
70
+ return image
71
+
72
+ def combine_caption_focus(caption, focus):
73
+ if caption is None:
74
+ caption = ""
75
+ if focus is None:
76
+ focus = "highly detailed photo, raw photography."
77
+ return (str(caption) + "\n\n" + str(focus)).strip()
78
+
79
+ def generate_caption(control_image):
80
+ if control_image is None:
81
+ return None, None
82
+
83
+ # Generate a detailed caption
84
+ mcaption = model.caption(control_image, length="short")
85
+ detailed_caption = mcaption["caption"]
86
+ print(f"Detailed caption: {detailed_caption}")
87
+
88
+ return detailed_caption
89
+
90
+ def generate_focus(control_image, focus_list):
91
+ if control_image is None:
92
+ return None
93
+ if focus_list is None:
94
+ return ""
95
+ # Generate a detailed caption
96
+ focus_query = model.query(control_image, "Please provide a concise but illustrative description of the following area(s) of focus: " + focus_list)
97
+ focus_description = focus_query["answer"]
98
+ print(f"Areas of focus: {focus_description}")
99
+
100
+ return focus_description
101
+
102
+ def process_image(control_image, user_prompt, system_prompt, scale, steps,
103
+ controlnet_conditioning_scale, guidance_scale, seed,
104
+ guidance_end, temperature, top_p, max_new_tokens, log_prompt):
105
+ # Initialize with empty caption
106
+ final_prompt = user_prompt.strip()
107
+ # If no user prompt provided, generate a caption first
108
+ if not final_prompt:
109
+ # Generate a detailed caption
110
+ print("Generating caption...")
111
+ mcaption = model.caption(control_image, length="normal")
112
+ detailed_caption = mcaption["caption"]
113
+ final_prompt = detailed_caption
114
+ yield f"Using caption: {final_prompt}", None, final_prompt
115
+
116
+ # Show the final prompt being used
117
+ yield f"Generating with: {final_prompt}", None, final_prompt
118
+
119
+ # Generate the image
120
+ try:
121
+ image = generate_image(
122
+ prompt=final_prompt,
123
+ scale=scale,
124
+ steps=steps,
125
+ control_image=control_image,
126
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
127
+ guidance_scale=guidance_scale,
128
+ seed=seed,
129
+ guidance_end=guidance_end
130
+ )
131
+
132
+ try:
133
+ debug_img = Image.open(image.save("/tmp/" + str(seed) + "output.png"))
134
+ save_image("/tmp/" + str(seed) + "output.png", debug_img)
135
+ except Exception as e:
136
+ print("Error 160: " + str(e))
137
+ log_params(final_prompt, scale, steps, controlnet_conditioning_scale, guidance_scale, seed, guidance_end, control_image, image)
138
+ yield f"Completed! Used prompt: {final_prompt}", image, final_prompt
139
+ except Exception as e:
140
+ print("Error: " + str(e))
141
+ yield f"Error: {str(e)}", None, None
142
+
143
+ with gr.Blocks(title="FLUX Turbo Upscaler", fill_height=True) as demo:
144
+ gr.Markdown("⚠️ WIP SPACE - UNFINISHED & BUGGY")
145
+ # status_box = gr.Markdown("🔄 Warming up...")
146
+
147
+ with gr.Row():
148
+ with gr.Accordion():
149
+ control_image = gr.Image(type="pil", label="Control Image", show_label=False)
150
+ with gr.Accordion():
151
+ generated_image = gr.Image(type="pil", label="Generated Image", format="png", show_label=False)
152
+ with gr.Row():
153
+ with gr.Column(scale=1):
154
+ prompt = gr.Textbox(lines=4, info="Enter your prompt here or wait for auto-generation...", label="Image Description")
155
+ focus = gr.Textbox(label="Area(s) of Focus", info="e.g. 'face', 'eyes', 'hair', 'clothes', 'background', etc.", value="clothing material, textures, ethnicity")
156
+ scale = gr.Slider(1, 3, value=1, label="Scale (Upscale Factor)", step=0.25)
157
+ with gr.Row():
158
+ generate_button = gr.Button("Generate Image", variant="primary")
159
+ caption_button = gr.Button("Generate Caption", variant="secondary")
160
+ with gr.Column(scale=1):
161
+ seed = gr.Slider(0, MAX_SEED, value=42, label="Seed", step=1)
162
+ steps = gr.Slider(2, 16, value=8, label="Steps", step=1)
163
+ controlnet_conditioning_scale = gr.Slider(0, 1, value=0.6, label="ControlNet Scale")
164
+ guidance_scale = gr.Slider(1, 30, value=3.5, label="Guidance Scale")
165
+ guidance_end = gr.Slider(0, 1, value=1.0, label="Guidance End")
166
+ with gr.Row():
167
+ with gr.Accordion("Auto-Caption settings", open=False, visible=False):
168
+ system_prompt = gr.Textbox(
169
+ lines=4,
170
+ value="Write a straightforward caption for this image. Begin with the main subject and medium. Mention pivotal elements—people, objects, scenery—using confident, definite language. Focus on concrete details like color, shape, texture, and spatial relationships. Show how elements interact. Omit mood and speculative wording. If text is present, quote it exactly. Note any watermarks, signatures, or compression artifacts. Never mention what's absent, resolution, or unobservable details. Vary your sentence structure and keep the description concise, without starting with 'This image is…' or similar phrasing.",
171
+ label="System Prompt for Captioning",
172
+ visible=False # Changed to visible
173
+ )
174
+ temperature_slider = gr.Slider(
175
+ minimum=0.0, maximum=2.0, value=0.6, step=0.05,
176
+ label="Temperature",
177
+ info="Higher values make the output more random, lower values make it more deterministic.",
178
+ visible=False # Changed to visible
179
+ )
180
+ top_p_slider = gr.Slider(
181
+ minimum=0.0, maximum=1.0, value=0.9, step=0.01,
182
+ label="Top-p",
183
+ visible=False # Changed to visible
184
+ )
185
+ max_tokens_slider = gr.Slider(
186
+ minimum=1, maximum=2048, value=368, step=1,
187
+ label="Max New Tokens",
188
+ info="Maximum number of tokens to generate. The model will stop generating if it reaches this limit.",
189
+ visible=False # Changed to visible
190
+ )
191
+ log_prompt = gr.Checkbox(value=True, label="Log", visible=False) # Changed to visible
192
+
193
+ gr.Markdown("**Tips:** 8 steps is all you need! Incredibly powerful tool, usage instructions coming soon.")
194
+
195
+ caption_state = gr.State()
196
+ focus_state = gr.State()
197
+ log_state = gr.State()
198
+
199
+ generate_button.click(
200
+ fn=process_image,
201
+ inputs=[
202
+ control_image, prompt, system_prompt, scale, steps,
203
+ controlnet_conditioning_scale, guidance_scale, seed,
204
+ guidance_end, temperature_slider, top_p_slider, max_tokens_slider, log_prompt
205
+ ],
206
+ outputs=[log_state, generated_image, prompt]
207
+ )
208
+ control_image.input(
209
+ generate_caption,
210
+ inputs=[control_image],
211
+ outputs=[caption_state]
212
+ ).then(
213
+ generate_focus,
214
+ inputs=[control_image, focus],
215
+ outputs=[focus_state]
216
+ ).then(
217
+ combine_caption_focus,
218
+ inputs=[caption_state, focus_state],
219
+ outputs=[prompt]
220
+ )
221
+ caption_button.click(
222
+ fn=generate_caption,
223
+ inputs=[control_image],
224
+ outputs=[prompt]
225
+ ).then(
226
+ generate_focus,
227
+ inputs=[control_image, focus],
228
+ outputs=[focus_state]
229
+ ).then(
230
+ combine_caption_focus,
231
+ inputs=[caption_state, focus_state],
232
+ outputs=[prompt]
233
+ )
234
+
235
+ demo.launch(show_error=True)
model_loader.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_loader.py
2
+ import os
3
+ import torch
4
+ from diffusers import FluxControlNetPipeline
5
+ from transformers import T5EncoderModel
6
+ from moondream import vl
7
+
8
+ def safe_model_load():
9
+ """Load models in a single GPU invocation to keep them warm"""
10
+ try:
11
+ # Set max memory usage for ZeroGPU
12
+ torch.cuda.set_per_process_memory_fraction(1.0)
13
+ torch.set_float32_matmul_precision("high")
14
+
15
+ # Load models
16
+ huggingface_token = os.getenv("HUGGINFACE_TOKEN")
17
+ md_api_key = os.getenv("MD_KEY")
18
+
19
+ text_encoder = T5EncoderModel.from_pretrained(
20
+ "LPX55/FLUX.1-merged_uncensored",
21
+ subfolder="text_encoder_2",
22
+ torch_dtype=torch.bfloat16,
23
+ token=huggingface_token
24
+ )
25
+
26
+ pipe = FluxControlNetPipeline.from_pretrained(
27
+ "LPX55/FLUX.1M-8step_upscaler-cnet",
28
+ torch_dtype=torch.bfloat16,
29
+ text_encoder_2=text_encoder,
30
+ token=huggingface_token
31
+ )
32
+
33
+ # Apply memory optimizations
34
+ try:
35
+ pipe.enable_xformers_memory_efficient_attention()
36
+ except Exception as e:
37
+ print(f"XFormers not available: {e}")
38
+
39
+ pipe.enable_attention_slicing()
40
+ pipe.enable_sequential_cpu_offload()
41
+ pipe.to("cuda")
42
+
43
+ # For memory-sensitive environments
44
+ try:
45
+ torch.multiprocessing.set_sharing_strategy('file_system')
46
+ except Exception as e:
47
+ print(f"Exception raised (torch.multiprocessing): {e}")
48
+ # Moondream
49
+ model = vl(api_key=md_api_key)
50
+
51
+ return {
52
+ "pipeline": pipe,
53
+ "captioner": model
54
+ }
55
+
56
+ except Exception as e:
57
+ print(f"Model loading failed: {e}")
58
+ # Return placeholder to handle gracefully in UI
59
+ return {"error": str(e)}