import torch from diffusers import StableDiffusionPipeline import gradio as gr # GPU 사용 가능 여부 확인 device = "cuda" if torch.cuda.is_available() else "cpu" # 파이프라인 로딩 pipe = StableDiffusionPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device == "cuda" else torch.float32 ).to(device) # 생성 함수 def generate(prompt): image = pipe(prompt).images[0] return image # Gradio 인터페이스 정의 interface = gr.Interface( fn=generate, inputs=gr.Textbox(label="프롬프트를 입력하세요", placeholder="예: a cute caricature of a cat in a hat"), outputs=gr.Image(type="pil"), title="Text to Image - Stable Diffusion", description="Stable Diffusion을 사용한 텍스트-이미지 생성기입니다." ) if __name__ == "__main__": interface.launch() # import os # import torch # import random # import importlib # from PIL import Image # from huggingface_hub import snapshot_download # import gradio as gr # from transformers import AutoProcessor, AutoModelForCausalLM, CLIPTextModel, CLIPTokenizer, CLIPFeatureExtractor # from diffusers import StableDiffusionPipeline, DiffusionPipeline, EulerDiscreteScheduler, UNet2DConditionModel # # 환경 설정 # os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" # REVISION = "ceaf371f01ef66192264811b390bccad475a4f02" # # 로컬 다운로드 # LOCAL_FLORENCE = snapshot_download("microsoft/Florence-2-base", revision=REVISION) # LOCAL_TURBOX = snapshot_download("tensorart/stable-diffusion-3.5-large-TurboX") # # 디바이스 및 dtype 설정 # device = "cuda" if torch.cuda.is_available() else "cpu" # dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # # 모델 로딩 (부분별 로딩 + dtype 적용) # scheduler = EulerDiscreteScheduler.from_pretrained( # LOCAL_TURBOX, subfolder="scheduler", torch_dtype=dtype # ) # text_encoder = CLIPTextModel.from_pretrained(LOCAL_TURBOX, subfolder="text_encoder", torch_dtype=dtype) # tokenizer = CLIPTokenizer.from_pretrained(LOCAL_TURBOX, subfolder="tokenizer") # feature_extractor = CLIPFeatureExtractor.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="feature_extractor") # unet = UNet2DConditionModel.from_pretrained(LOCAL_TURBOX, subfolder="unet", torch_dtype=dtype) # florence_model = AutoModelForCausalLM.from_pretrained( # LOCAL_FLORENCE, trust_remote_code=True, torch_dtype=dtype # ) # florence_model.to("cpu").eval() # florence_processor = AutoProcessor.from_pretrained(LOCAL_FLORENCE, trust_remote_code=True) # # Stable Diffusion 파이프라인 # pipe = DiffusionPipeline.from_pretrained( # LOCAL_TURBOX, # torch_dtype=dtype, # trust_remote_code=True, # safety_checker=None, # feature_extractor=None # ) # pipe = pipe.to(device) # pipe.scheduler = scheduler # pipe.enable_attention_slicing() # 메모리 절약 # # 상수 # MAX_SEED = 2**31 - 1 # # 텍스트 스타일러 # def pseudo_translate_to_korean_style(en_prompt: str) -> str: # return f"Cartoon styled {en_prompt} handsome or pretty people" # # 프롬프트 생성 # def generate_prompt(image): # if not isinstance(image, Image.Image): # image = Image.fromarray(image) # inputs = florence_processor(text="", images=image, return_tensors="pt").to("cpu") # with torch.no_grad(): # generated_ids = florence_model.generate( # input_ids=inputs["input_ids"], # pixel_values=inputs["pixel_values"], # max_new_tokens=256, # num_beams=3 # ) # generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0] # parsed_answer = florence_processor.post_process_generation( # generated_text, # task="", # image_size=(image.width, image.height) # ) # prompt_en = parsed_answer[""] # cartoon_prompt = pseudo_translate_to_korean_style(prompt_en) # return cartoon_prompt # # 이미지 생성 함수 # def generate_image(prompt, seed=42, randomize_seed=False): # if randomize_seed: # seed = random.randint(0, MAX_SEED) # generator = torch.Generator().manual_seed(seed) # image = pipe( # prompt=prompt, # guidance_scale=1.5, # num_inference_steps=6, # 최적화된 step 수 # width=512, # height=512, # generator=generator # ).images[0] # return image, seed # # Gradio UI # with gr.Blocks() as demo: # gr.Markdown("# 🖼 이미지 → 설명 생성 → 카툰 이미지 자동 생성기") # gr.Markdown("**📌 사용법 안내 (한국어)**\n" # "- 이미지를 업로드하면 AI가 설명 → 스타일 변환 → 카툰 이미지 생성까지 자동으로 수행합니다.") # with gr.Row(): # with gr.Column(): # input_img = gr.Image(label="🎨 원본 이미지 업로드") # run_button = gr.Button("✨ 생성 시작") # with gr.Column(): # prompt_out = gr.Textbox(label="📝 스타일 적용된 프롬프트", lines=3, show_copy_button=True) # output_img = gr.Image(label="🎉 생성된 이미지") # def full_process(img): # prompt = generate_prompt(img) # image, seed = generate_image(prompt, randomize_seed=True) # return prompt, image # run_button.click(fn=full_process, inputs=[input_img], outputs=[prompt_out, output_img]) # demo.launch()