LPX55 commited on
Commit
1b616c4
·
verified ·
1 Parent(s): e4bd5a4

Update optimized.py

Browse files
Files changed (1) hide show
  1. optimized.py +24 -32
optimized.py CHANGED
@@ -22,48 +22,40 @@ huggingface_token = os.getenv("HUGGINFACE_TOKEN")
22
  # device_map=None, # Disable automatic mapping
23
  # token=huggingface_token
24
  # )
25
- controlnet = FluxControlNetModel.from_pretrained(
26
- "jasperai/Flux.1-dev-Controlnet-Upscaler",
27
- torch_dtype=torch.bfloat16
 
 
 
 
28
  )
29
- # Device management - critical fix
 
30
  pipe = FluxControlNetPipeline.from_pretrained(
31
  "LPX55/FLUX.1-merged_uncensored",
32
- controlnet=controlnet,
33
- vae=good_vae,
 
 
 
34
  torch_dtype=torch.bfloat16,
35
  use_safetensors=True,
36
  device_map=None,
37
- token=huggingface_token
38
  )
39
 
40
- # Sequence verified for Diffusers 0.20.0+
41
- device_map = infer_auto_device_map(
42
- pipe,
43
- max_memory={0:"37GiB", "cpu":"60GiB"},
44
- device_types=["cuda", "cpu"]
45
- )
46
- pipe = dispatch_model(pipe, device_map=device_map, main_device="cuda")
47
- pipe.enable_sequential_cpu_offload() # No arguments for new API
48
-
49
- # Precision alignment (AFTER offloading) <source_id data="pipeline_flux_controlnet.py" />
50
- pipe.unet.to(dtype=torch.bfloat16)
51
- pipe.controlnet.to(dtype=torch.bfloat16)
52
- pipe.vae.to(dtype=torch.bfloat16)
53
-
54
- # XFormers with Flux compatibility
55
- if torch.cuda.is_available():
56
- try:
57
- pipe.enable_xformers_memory_efficient_attention(
58
- attention_op=None # Auto-select best operator
59
- )
60
- except Exception as e:
61
- print(f"xFormers error: {e}")
62
- pipe.enable_sdp_attention(mode="math")
63
 
64
- # Memory format optimization
65
- pipe.to(memory_format=torch.channels_last)
 
66
 
 
 
 
67
 
68
  print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
69
  @spaces.GPU
 
22
  # device_map=None, # Disable automatic mapping
23
  # token=huggingface_token
24
  # )
25
+
26
+ good_vae = AutoencoderKL.from_pretrained(
27
+ "black-forest-labs/FLUX.1-dev",
28
+ subfolder="vae",
29
+ torch_dtype=torch.bfloat16,
30
+ use_safetensors=True,
31
+ token=huggingface_token # Fix typo in variable name
32
  )
33
+
34
+ # 2. Main Pipeline Initialization WITH VAE SCOPE
35
  pipe = FluxControlNetPipeline.from_pretrained(
36
  "LPX55/FLUX.1-merged_uncensored",
37
+ controlnet=FluxControlNetModel.from_pretrained(
38
+ "jasperai/Flux.1-dev-Controlnet-Upscaler",
39
+ torch_dtype=torch.bfloat16
40
+ ),
41
+ vae=good_vae, # Now defined in scope
42
  torch_dtype=torch.bfloat16,
43
  use_safetensors=True,
44
  device_map=None,
45
+ token=huggingface_token # Note corrected env var name
46
  )
47
 
48
+ # 3. Strict Order for Optimization Steps
49
+ # A. Apply CPU Offloading FIRST
50
+ pipe.enable_sequential_cpu_offload()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # B. Enable Memory Optimizations
53
+ pipe.enable_vae_tiling()
54
+ pipe.enable_xformers_memory_efficient_attention()
55
 
56
+ # C. Unified Precision Handling
57
+ for comp in [pipe.unet, pipe.vae, pipe.controlnet]:
58
+ comp.to(dtype=torch.bfloat16)
59
 
60
  print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
61
  @spaces.GPU