✨ FLUX Ghibli LoRA Generator ✨
Community: https://discord.gg/openfreeai
import random import os import uuid from datetime import datetime import gradio as gr import numpy as np import spaces import torch from diffusers import DiffusionPipeline from PIL import Image import re import tempfile import io import logging # ----------------------------- # Google Gemini API 관련 # ----------------------------- import google.generativeai as genai import google.generativeai.types as genai_types logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') ############################################################################### # 1. 텍스트(한글 → 영어) 변환 보조 함수 ############################################################################### def maybe_translate_to_english(text: str) -> str: """ 텍스트에 한국어가 있으면 간단한 치환 규칙에 따라 영어로 변환. """ translations = { "안녕하세요": "Hello", "환영합니다": "Welcome", "안녕": "Hello", "배너": "Banner", # 필요에 따라 추가 } for kr, en in translations.items(): if kr in text: text = text.replace(kr, en) return text ############################################################################### # 2. Gemini API 호출을 위한 준비 ############################################################################### def save_binary_file(file_name, data): """ 이진 파일을 저장하는 헬퍼 함수 """ with open(file_name, "wb") as f: f.write(data) def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"): """ Google Gemini API를 호출해 텍스트 기반 이미지 편집/생성을 수행. file_name: 원본 이미지를 임시 업로드하여 API로 전달 text: 적용할 텍스트 지시사항 """ api_key = os.getenv("GAPI_TOKEN") if not api_key: raise ValueError("GAPI_TOKEN is missing. Please set an API key.") # Gemini API 인증 설정 genai.configure(api_key=api_key) # 이미지 파일 업로드 uploaded_file = genai.upload_file(path=file_name) # API에 전달할 content 구성 contents = [ genai_types.Content( role="user", parts=[ # 먼저 업로드된 파일 URI를 포함 genai_types.Part.from_uri( file_uri=uploaded_file.uri, mime_type=uploaded_file.mime_type, ), # 이어서 text 지시사항을 포함 genai_types.Part.from_text(text=text), ], ), ] # 생성(편집) 설정 generation_config = genai_types.GenerationConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, # 출력 토큰 제한 response_mime_type="text/plain", ) text_response = "" # API가 반환한 텍스트 누적 image_path = None # API가 반환한 이미지 파일의 로컬 경로 # 임시 파일에 편집된 이미지 저장 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: temp_path = tmp.name # 스트리밍 형태로 응답을 받음 response = genai.generate_content( model=model, contents=contents, generation_config=generation_config, stream=True ) # 스트리밍된 chunk들에서 이미지와 텍스트를 추출 for chunk in response: for candidate in chunk.candidates: for part in candidate.content.parts: # 이미지인 경우 if hasattr(part, 'inline_data') and part.inline_data: save_binary_file(temp_path, part.inline_data.data) image_path = temp_path break # 텍스트인 경우 elif hasattr(part, 'text'): text_response += part.text + "\n" if image_path: break if image_path: break # 업로드된 임시 파일 삭제 genai.delete_file(uploaded_file.name) return image_path, text_response ############################################################################### # 3. 이미지에 텍스트를 삽입/수정하는 함수 (Gemini API 2회 호출) ############################################################################### def change_text_in_image_two_times(original_image, instruction): """ Gemini API를 두 번 호출하여 두 개의 버전을 생성한다. """ import numpy as np # 만약 이미지가 numpy.ndarray 타입이면 PIL로 변환 if isinstance(original_image, np.ndarray): original_image = Image.fromarray(original_image) results = [] for version_tag in ["(A)", "(B)"]: mod_instruction = f"{instruction} {version_tag}" try: with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: original_path = tmp.name if isinstance(original_image, Image.Image): original_image.save(original_path, format="PNG") logging.debug(f"[DEBUG] Saved image to temporary file: {original_path}") else: raise gr.Error(f"예상된 PIL Image가 아닌 {type(original_image)} 타입이 제공되었습니다.") # Gemini API 호출 image_path, text_response = generate_by_google_genai( text=mod_instruction, file_name=original_path ) if image_path: # 반환된 이미지 로드 try: with open(image_path, "rb") as f: image_data = f.read() new_img = Image.open(io.BytesIO(image_data)) results.append(new_img) except Exception as img_err: logging.error(f"[ERROR] Failed to process Gemini image: {img_err}") results.append(original_image) else: logging.warning(f"[WARNING] 이미지가 반환되지 않았습니다. 텍스트 응답: {text_response}") results.append(original_image) except Exception as e: logging.exception(f"Text modification error: {e}") results.append(original_image) return results ############################################################################### # 4. 텍스트 렌더링(문자 삽입)용 함수 ############################################################################### def gemini_text_rendering(image, rendering_text): """ 주어진 image에 대해 Gemini API로 text_rendering을 적용 """ rendering_text_en = maybe_translate_to_english(rendering_text) instruction = ( f"Render the following text on the image in a clear, visually appealing manner: " f"{rendering_text_en}." ) # 이미지에 텍스트 삽입(A/B 버전 2회 생성) → 여기서는 2회 중 첫 번째만 반환 rendered_images = change_text_in_image_two_times(image, instruction) if rendered_images and len(rendered_images) > 0: return rendered_images[0] return image def apply_text_rendering(image, rendering_text): """ rendering_text가 존재하면 Gemini API로 텍스트 삽입을 적용. 없으면 원본 이미지를 그대로 반환. """ if rendering_text and rendering_text.strip(): return gemini_text_rendering(image, rendering_text) return image ############################################################################### # 5. Diffusion Pipeline 로드 및 기본 세팅 ############################################################################### SAVE_DIR = "saved_images" if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR, exist_ok=True) device = "cuda" if torch.cuda.is_available() else "cpu" repo_id = "black-forest-labs/FLUX.1-dev" adapter_id = "openfree/flux-chatgpt-ghibli-lora" def load_model_with_retry(max_retries=5): """ 로컬 또는 Hugging Face로부터 모델(FLUX.1-dev) + LoRA 어댑터(weights)를 불러온다. """ for attempt in range(max_retries): try: logging.info(f"Loading model attempt {attempt+1}/{max_retries}...") pipeline = DiffusionPipeline.from_pretrained( repo_id, torch_dtype=torch.bfloat16, use_safetensors=True, resume_download=True ) logging.info("Model loaded successfully, loading LoRA weights...") pipeline.load_lora_weights(adapter_id) pipeline = pipeline.to(device) logging.info("Pipeline ready!") return pipeline except Exception as e: if attempt < max_retries - 1: wait_time = 10 * (attempt + 1) logging.error(f"Error loading model: {e}. Retrying in {wait_time} seconds...") import time time.sleep(wait_time) else: raise Exception(f"Failed to load model after {max_retries} attempts: {e}") pipeline = load_model_with_retry() MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 def save_generated_image(image, prompt): """ 생성된 이미지를 저장하면서 메타 정보를 기록한다. """ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") unique_id = str(uuid.uuid4())[:8] filename = f"{timestamp}_{unique_id}.png" filepath = os.path.join(SAVE_DIR, filename) image.save(filepath) metadata_file = os.path.join(SAVE_DIR, "metadata.txt") with open(metadata_file, "a", encoding="utf-8") as f: f.write(f"{filename}|{prompt}|{timestamp}\n") return filepath def load_generated_images(): """ 저장된 이미지를 최신순으로 불러온다. """ if not os.path.exists(SAVE_DIR): return [] image_files = [ os.path.join(SAVE_DIR, f) for f in os.listdir(SAVE_DIR) if f.endswith(('.png', '.jpg', '.jpeg', '.webp')) ] image_files.sort(key=lambda x: os.path.getctime(x), reverse=True) return image_files @spaces.GPU(duration=120) def inference( prompt: str, seed: int, randomize_seed: bool, width: int, height: int, guidance_scale: float, num_inference_steps: int, lora_scale: float, progress: gr.Progress = gr.Progress(track_tqdm=True), ): """ Diffusion Pipeline을 사용해 이미지를 생성. (LoRA 스케일, Steps 등 설정 가능) """ if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) try: image = pipeline( prompt=prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator, joint_attention_kwargs={"scale": lora_scale}, ).images[0] filepath = save_generated_image(image, prompt) return image, seed, load_generated_images() except Exception as e: logging.error(f"Error during inference: {e}") error_img = Image.new('RGB', (width, height), color='red') return error_img, seed, load_generated_images() ############################################################################### # 6. Gradio UI ############################################################################### examples = [ "Ghibli style futuristic stormtrooper with glossy white armor and a sleek helmet, standing heroically on a lush alien planet, vibrant flowers blooming around, soft sunlight illuminating the scene, a gentle breeze rustling the leaves. The armor reflects the pink and purple hues of the alien sunset, creating an ethereal glow around the figure. [trigger]", "Ghibli style young mechanic girl in a floating workshop, surrounded by hovering tools and glowing mechanical parts, her blue overalls covered in oil stains, tinkering with a semi-transparent robot companion. Magical sparks fly as she works, while floating islands with waterfalls drift past her open workshop window. [trigger]", "Ghibli style ancient forest guardian robot, covered in moss and flowering vines, sitting peacefully in a crystal-clear lake. Its gentle eyes glow with soft blue light, while bioluminescent dragonflies dance around its weathered metal frame. Ancient tech symbols on its surface pulse with a gentle rhythm. [trigger]", "Ghibli style sky whale transport ship, its metallic skin adorned with traditional Japanese patterns, gliding through cotton candy clouds at sunrise. Small floating gardens hang from its sides, where workers in futuristic kimonos tend to glowing plants. Rainbow auroras shimmer in the background. [trigger]", "Ghibli style cyber-shrine maiden with flowing holographic robes, performing a ritual dance among floating lanterns and digital cherry blossoms. Her traditional headdress emits soft light patterns, while spirit-like AI constructs swirl around her in elegant patterns. The scene is set in a modern shrine with both ancient wood and sleek chrome elements. [trigger]", "Ghibli style robot farmer tending to floating rice paddies in the sky, wearing a traditional straw hat with advanced sensors. Its gentle movements create ripples in the water as it plants glowing rice seedlings. Flying fish leap between the terraced fields, leaving trails of sparkles in their wake, while future Tokyo's spires gleam in the distance. [trigger]" ] css = """ :root { --primary-color: #6a92cc; --primary-hover: #557ab8; --secondary-color: #f4c062; --background-color: #f7f9fc; --panel-background: #ffffff; --text-color: #333333; --border-radius: 12px; --shadow: 0 4px 12px rgba(0,0,0,0.08); --font-main: 'Poppins', -apple-system, BlinkMacSystemFont, sans-serif; } body { background-color: var(--background-color); font-family: var(--font-main); } .gradio-container { margin: 0 auto; max-width: 1200px !important; } .main-header { text-align: center; padding: 2rem 1rem 1rem; background: linear-gradient(90deg, #6a92cc 0%, #8f7fc8 100%); color: white; margin-bottom: 2rem; border-radius: var(--border-radius); box-shadow: var(--shadow); } .main-header h1 { font-size: 2.5rem; margin-bottom: 0.5rem; font-weight: 700; text-shadow: 0 2px 4px rgba(0,0,0,0.2); } .main-header p { font-size: 1rem; margin-bottom: 0.5rem; opacity: 0.9; } .main-header a { color: var(--secondary-color); text-decoration: none; font-weight: 600; transition: all 0.2s ease; } .main-header a:hover { text-decoration: underline; opacity: 0.9; } .container { background-color: var(--panel-background); padding: 1.5rem; border-radius: var(--border-radius); box-shadow: var(--shadow); margin-bottom: 1.5rem; } button.primary { background: var(--primary-color) !important; border: none !important; color: white !important; padding: 10px 20px !important; border-radius: 8px !important; font-weight: 600 !important; box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; transition: all 0.2s ease !important; } button.primary:hover { background: var(--primary-hover) !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important; } button.secondary { background: white !important; border: 1px solid #ddd !important; color: var(--text-color) !important; padding: 10px 20px !important; border-radius: 8px !important; font-weight: 500 !important; box-shadow: 0 2px 5px rgba(0,0,0,0.05) !important; transition: all 0.2s ease !important; } button.secondary:hover { background: #f5f5f5 !important; transform: translateY(-2px) !important; } .gr-box { border-radius: var(--border-radius) !important; border: 1px solid #e0e0e0 !important; } .gr-panel { border-radius: var(--border-radius) !important; } .gr-input { border-radius: 8px !important; border: 1px solid #ddd !important; padding: 12px !important; } .gr-form { border-radius: var(--border-radius) !important; background-color: var(--panel-background) !important; } .gr-accordion { border-radius: var(--border-radius) !important; overflow: hidden !important; } .gr-button { border-radius: 8px !important; } .gallery-item { border-radius: var(--border-radius) !important; transition: all 0.3s ease !important; } .gallery-item:hover { transform: scale(1.02) !important; box-shadow: 0 6px 15px rgba(0,0,0,0.1) !important; } .tabs { border-radius: var(--border-radius) !important; overflow: hidden !important; } footer { display: none !important; } .settings-accordion legend span { font-weight: 600 !important; } .example-prompt { font-size: 0.9rem; color: #555; padding: 8px; background: #f5f7fa; border-radius: 6px; border-left: 3px solid var(--primary-color); margin-bottom: 8px; cursor: pointer; transition: all 0.2s; } .example-prompt:hover { background: #eef2f8; } .status-generating { color: #ffa200; font-weight: 500; display: flex; align-items: center; gap: 8px; } .status-generating::before { content: ""; display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: #ffa200; animation: pulse 1.5s infinite; } .status-complete { color: #00c853; font-weight: 500; display: flex; align-items: center; gap: 8px; } .status-complete::before { content: ""; display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: #00c853; } @keyframes pulse { 0% { opacity: 0.6; } 50% { opacity: 1; } 100% { opacity: 0.6; } } .gr-accordion-title { font-weight: 600 !important; color: var(--text-color) !important; } .tabs button { font-weight: 500 !important; padding: 10px 16px !important; } .tabs button.selected { font-weight: 600 !important; color: var(--primary-color) !important; background: rgba(106, 146, 204, 0.1) !important; } .gr-slider-container { padding: 10px 0 !important; } .gr-prose h3 { font-weight: 600 !important; color: var(--primary-color) !important; margin-bottom: 1rem !important; } """ with gr.Blocks(css=css, analytics_enabled=False, theme="soft") as demo: with gr.Column(): gr.HTML('''
Community: https://discord.gg/openfreeai