Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import torch
|
7 |
+
from PIL import Image # PILμ μ¬μ©νμ¬ μ΄λ―Έμ§λ₯Ό μ΄κΈ° μν΄ μΆκ°
|
8 |
+
|
9 |
+
# .env νμΌμμ νκ²½ λ³μλ₯Ό λΆλ¬μ΄
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# API ν€ λ° κ²μ¦
|
13 |
+
API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
+
if API_KEY is None:
|
15 |
+
raise ValueError("OPENAI_API_KEY νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€.")
|
16 |
+
|
17 |
+
openai.api_key = API_KEY # OpenAI API ν€ μ€μ
|
18 |
+
|
19 |
+
# BLIP λͺ¨λΈ λ‘λ (μ΄λ―Έμ§ μΊ‘μ
λ)
|
20 |
+
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
21 |
+
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
22 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU λλ CPU μ€μ
|
23 |
+
blip_model.to(device) # λͺ¨λΈμ μ νν λλ°μ΄μ€λ‘ μ΄λ
|
24 |
+
|
25 |
+
# GPT-4 API νΈμΆ ν¨μ (temperature, top_p κ° μΆκ° μ‘°μ )
|
26 |
+
def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
|
27 |
+
try:
|
28 |
+
response = openai.ChatCompletion.create(
|
29 |
+
model="gpt-4o-mini",
|
30 |
+
messages=[
|
31 |
+
{"role": "system", "content": system_message},
|
32 |
+
{"role": "user", "content": content},
|
33 |
+
],
|
34 |
+
max_tokens=max_tokens,
|
35 |
+
temperature=temperature,
|
36 |
+
top_p=top_p,
|
37 |
+
)
|
38 |
+
return response.choices[0].message['content'].strip()
|
39 |
+
except openai.OpenAIError as e:
|
40 |
+
return f"OpenAI API Error: {str(e)}"
|
41 |
+
|
42 |
+
# generate_blog_post_in_korean ν¨μ μμ (temperature, top_p μ‘°μ λ° ν둬ννΈ μμ )
|
43 |
+
def generate_blog_post_in_korean(image_path, user_input, style):
|
44 |
+
# 1. νμΌ κ²½λ‘μμ μ΄λ―Έμ§λ₯Ό μ΄μ΄ PIL μ΄λ―Έμ§λ‘ λ³ν
|
45 |
+
image = Image.open(image_path)
|
46 |
+
|
47 |
+
# 2. μ΄λ―Έμ§ μΊ‘μ
λ μμ± (BLIP)
|
48 |
+
inputs = blip_processor(image, return_tensors="pt").to(device)
|
49 |
+
out = blip_model.generate(**inputs)
|
50 |
+
image_caption = blip_processor.decode(out[0], skip_special_tokens=True)
|
51 |
+
|
52 |
+
# 3. μ€νμΌμ λ°λΌ ν둬ννΈ λ° temperature/top_p μ€μ
|
53 |
+
if style == "μ¬μ€μ μΈ":
|
54 |
+
combined_prompt = (
|
55 |
+
f"μ΄λ―Έμ§ μ€λͺ
: {image_caption}\n"
|
56 |
+
f"μ¬μ©μ μ
λ ₯: {user_input}\n\n"
|
57 |
+
"μ΄ λ μ€λͺ
μ κΈ°λ°μΌλ‘ μλ κ·Έλλ‘μ μ¬μ€λ§ κ°κ²°νκ³ μ ννκ² λ¬μ¬ν΄ μ£ΌμΈμ. "
|
58 |
+
"λΆνμν λ°°κ²½ μ€λͺ
μ΄λ μΆλ‘ μ νΌνκ³ , μ₯λ©΄μ λν μ νν μ λ³΄λ§ μ κ³΅ν΄ μ£ΌμΈμ.\n\n"
|
59 |
+
"μμ: 'ν
μ΄λΈ μμ μ¬λ¬ κ·Έλ¦μ λμ₯μ°κ°μ λ€μν μμλ€μ΄ λμ¬μ Έ μλ€. "
|
60 |
+
"μ€μμ λλ°°κΈ°μ λ΄κΈ΄ λμ₯μ°κ°κ° μκ³ , κ·Έ μμλ κ°μ’
λ°μ°¬λ€μ΄ λμ¬ μμ΅λλ€.'"
|
61 |
+
)
|
62 |
+
temperature = 0.2 # μ΅λν μ¬μ€μ κΈ°λ°
|
63 |
+
top_p = 0.7 # μμΈ‘μ λ€μμ± μ΅μ
|
64 |
+
elif style == "κ°μ±μ μΈ":
|
65 |
+
combined_prompt = (
|
66 |
+
f"μ΄λ―Έμ§ μ€λͺ
: {image_caption}\n"
|
67 |
+
f"μ¬μ©μ μ
λ ₯: {user_input}\n\n"
|
68 |
+
"μ΄ λ μ€λͺ
μ μ°Έκ³ ν΄μ μΌμμ μ΄κ³ λ°λ»ν λΆμκΈ°μ κΈλ‘ ννν΄ μ£ΌμΈμ. "
|
69 |
+
"μΆκ°μ μΈ μ€λͺ
μ΄λ 배경보λ€λ μ₯λ©΄κ³Ό κ°μ μ μμ°μ€λ½κ² μ λ¬νλ κΈμ μ¨ μ£ΌμΈμ.\n\n"
|
70 |
+
"μμ: 'λμ₯μ°κ°κ° λμΈ ν
μ΄λΈμλ λ€μν μμλ€μ΄ μ κ°νκ² μ°¨λ €μ Έ μμ΅λλ€. "
|
71 |
+
"λ¨λν λμ₯μ°κ°μμλ ꡬμν ν₯μ΄ νκΈ°κ³ , κ·Έ μμλ κ³ κΈ°μ μ±μκ° λ¬λΏ λ΄κΈ΄ λ°μ°¬λ€μ΄ λμ¬ μμ΄μ. "
|
72 |
+
"λ°₯κ³Ό ν¨κ» λ¨ΉκΈ° μ’μ μμλ€μ΄ μ€λΉλμ΄ μκ³ , μ§μμ μ μ±μ€λ½κ² λ§λ λ°λ»ν λλμ΄ λλλ€.'"
|
73 |
+
)
|
74 |
+
temperature = 0.7 # λ μ°½μμ μ΄κ³ κ°μ±μ μΈ νν
|
75 |
+
top_p = 0.9 # νλΆν ννμ μν΄ λ€μμ± νμ©
|
76 |
+
|
77 |
+
# 4. GPT-4λ‘ μ€λͺ
μμ±
|
78 |
+
system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
|
79 |
+
translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
|
80 |
+
|
81 |
+
return translated_caption
|
82 |
+
|
83 |
+
# νλμ μ΄λ―Έμ§λ§ μ²λ¦¬νλ ν¨μ
|
84 |
+
def generate_blog_post_single(image, desc, style):
|
85 |
+
if image is not None and desc.strip() != "":
|
86 |
+
result = generate_blog_post_in_korean(image, desc, style)
|
87 |
+
return result
|
88 |
+
else:
|
89 |
+
return "" # μ΄λ―Έμ§κ° μκ±°λ μ€λͺ
μ΄ μμΌλ©΄ λΉ λ¬Έμμ΄ λ°ν
|
90 |
+
|
91 |
+
# Gradio μΈν°νμ΄μ€ μ€μ (νλμ μ΄λ―Έμ§μ μ€λͺ
λ§ λ°μ)
|
92 |
+
iface = gr.Interface(
|
93 |
+
fn=generate_blog_post_single,
|
94 |
+
inputs=[
|
95 |
+
gr.File(label="μ΄λ―Έμ§ μ
λ‘λ"), # gr.Image λμ gr.Fileλ‘ λ³κ²½
|
96 |
+
gr.Textbox(label="μ¬μ§μ λν μ€λͺ
μ
λ ₯", placeholder="μ¬μ§ μ€λͺ
μ μ
λ ₯νμΈμ"),
|
97 |
+
gr.Radio(["μ¬μ€μ μΈ", "κ°μ±μ μΈ"], label="μ€λͺ
μ€νμΌ μ ν", value="μ¬μ€μ μΈ") # default -> valueλ‘ λ³κ²½
|
98 |
+
],
|
99 |
+
outputs=gr.Textbox(label="μ΄λ―Έμ§ μ€λͺ
κ²°κ³Ό"),
|
100 |
+
title="μ΄λ―Έμ§ μ€λͺ
μμ±κΈ°",
|
101 |
+
description="νλμ μ΄λ―Έμ§μ ν
μ€νΈλ₯Ό λ°νμΌλ‘ μ΅μμ νκ΅μ΄λ‘ ννν©λλ€.",
|
102 |
+
allow_flagging="never"
|
103 |
+
)
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
iface.launch(share=True)
|