AIRider commited on
Commit
5625f85
Β·
verified Β·
1 Parent(s): af33bc4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import gradio as gr
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration
5
+ from dotenv import load_dotenv
6
+ import torch
7
+ from PIL import Image # PIL을 μ‚¬μš©ν•˜μ—¬ 이미지λ₯Ό μ—΄κΈ° μœ„ν•΄ μΆ”κ°€
8
+
9
+ # .env νŒŒμΌμ—μ„œ ν™˜κ²½ λ³€μˆ˜λ₯Ό 뢈러옴
10
+ load_dotenv()
11
+
12
+ # API ν‚€ 및 검증
13
+ API_KEY = os.getenv("OPENAI_API_KEY")
14
+ if API_KEY is None:
15
+ raise ValueError("OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
16
+
17
+ openai.api_key = API_KEY # OpenAI API ν‚€ μ„€μ •
18
+
19
+ # BLIP λͺ¨λΈ λ‘œλ“œ (이미지 캑셔닝)
20
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
21
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU λ˜λŠ” CPU μ„€μ •
23
+ blip_model.to(device) # λͺ¨λΈμ„ μ„ νƒν•œ λ””λ°”μ΄μŠ€λ‘œ 이동
24
+
25
+ # GPT-4 API 호좜 ν•¨μˆ˜ (temperature, top_p κ°’ μΆ”κ°€ μ‘°μ •)
26
+ def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
27
+ try:
28
+ response = openai.ChatCompletion.create(
29
+ model="gpt-4o-mini",
30
+ messages=[
31
+ {"role": "system", "content": system_message},
32
+ {"role": "user", "content": content},
33
+ ],
34
+ max_tokens=max_tokens,
35
+ temperature=temperature,
36
+ top_p=top_p,
37
+ )
38
+ return response.choices[0].message['content'].strip()
39
+ except openai.OpenAIError as e:
40
+ return f"OpenAI API Error: {str(e)}"
41
+
42
+ # generate_blog_post_in_korean ν•¨μˆ˜ μˆ˜μ • (temperature, top_p μ‘°μ • 및 ν”„λ‘¬ν”„νŠΈ μˆ˜μ •)
43
+ def generate_blog_post_in_korean(image_path, user_input, style):
44
+ # 1. 파일 κ²½λ‘œμ—μ„œ 이미지λ₯Ό μ—΄μ–΄ PIL μ΄λ―Έμ§€λ‘œ λ³€ν™˜
45
+ image = Image.open(image_path)
46
+
47
+ # 2. 이미지 캑셔닝 생성 (BLIP)
48
+ inputs = blip_processor(image, return_tensors="pt").to(device)
49
+ out = blip_model.generate(**inputs)
50
+ image_caption = blip_processor.decode(out[0], skip_special_tokens=True)
51
+
52
+ # 3. μŠ€νƒ€μΌμ— 따라 ν”„λ‘¬ν”„νŠΈ 및 temperature/top_p μ„€μ •
53
+ if style == "사싀적인":
54
+ combined_prompt = (
55
+ f"이미지 μ„€λͺ…: {image_caption}\n"
56
+ f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
57
+ "이 두 μ„€λͺ…을 기반으둜 μžˆλŠ” κ·ΈλŒ€λ‘œμ˜ μ‚¬μ‹€λ§Œ κ°„κ²°ν•˜κ³  μ •ν™•ν•˜κ²Œ λ¬˜μ‚¬ν•΄ μ£Όμ„Έμš”. "
58
+ "λΆˆν•„μš”ν•œ λ°°κ²½ μ„€λͺ…μ΄λ‚˜ 좔둠은 ν”Όν•˜κ³ , μž₯면에 λŒ€ν•œ μ •ν™•ν•œ μ •λ³΄λ§Œ μ œκ³΅ν•΄ μ£Όμ„Έμš”.\n\n"
59
+ "μ˜ˆμ‹œ: 'ν…Œμ΄λΈ” μœ„μ— μ—¬λŸ¬ κ·Έλ¦‡μ˜ 된μž₯μ°Œκ°œμ™€ λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ 놓여져 μžˆλ‹€. "
60
+ "쀑앙에 λšλ°°κΈ°μ— λ‹΄κΈ΄ 된μž₯μ°Œκ°œκ°€ 있고, κ·Έ μ˜†μ—λŠ” 각쒅 λ°˜μ°¬λ“€μ΄ 놓여 μžˆμŠ΅λ‹ˆλ‹€.'"
61
+ )
62
+ temperature = 0.2 # μ΅œλŒ€ν•œ 사싀에 기반
63
+ top_p = 0.7 # 예츑의 λ‹€μ–‘μ„± μ–΅μ œ
64
+ elif style == "감성적인":
65
+ combined_prompt = (
66
+ f"이미지 μ„€λͺ…: {image_caption}\n"
67
+ f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
68
+ "이 두 μ„€λͺ…을 μ°Έκ³ ν•΄μ„œ 일상적이고 λ”°λœ»ν•œ λΆ„μœ„κΈ°μ˜ κΈ€λ‘œ ν‘œν˜„ν•΄ μ£Όμ„Έμš”. "
69
+ "좔가적인 μ„€λͺ…μ΄λ‚˜ λ°°κ²½λ³΄λ‹€λŠ” μž₯λ©΄κ³Ό 감정을 μžμ—°μŠ€λŸ½κ²Œ μ „λ‹¬ν•˜λŠ” 글을 써 μ£Όμ„Έμš”.\n\n"
70
+ "μ˜ˆμ‹œ: '된μž₯μ°Œκ°œκ°€ 놓인 ν…Œμ΄λΈ”μ—λŠ” λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ μ •κ°ˆν•˜κ²Œ μ°¨λ €μ Έ μžˆμŠ΅λ‹ˆλ‹€. "
71
+ "λœ¨λˆν•œ 된μž₯μ°Œκ°œμ—μ„œλŠ” κ΅¬μˆ˜ν•œ ν–₯이 풍기고, κ·Έ μ˜†μ—λŠ” 고기와 μ±„μ†Œκ°€ 듬뿍 λ‹΄κΈ΄ λ°˜μ°¬λ“€μ΄ 놓여 μžˆμ–΄μš”. "
72
+ "λ°₯κ³Ό ν•¨κ»˜ λ¨ΉκΈ° 쒋은 μŒμ‹λ“€μ΄ μ€€λΉ„λ˜μ–΄ 있고, μ§‘μ—μ„œ μ •μ„±μŠ€λŸ½κ²Œ λ§Œλ“  λ”°λœ»ν•œ λŠλ‚Œμ΄ λ“­λ‹ˆλ‹€.'"
73
+ )
74
+ temperature = 0.7 # 더 창의적이고 감성적인 ν‘œν˜„
75
+ top_p = 0.9 # ν’λΆ€ν•œ ν‘œν˜„μ„ μœ„ν•΄ λ‹€μ–‘μ„± ν—ˆμš©
76
+
77
+ # 4. GPT-4둜 μ„€λͺ… 생성
78
+ system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
79
+ translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
80
+
81
+ return translated_caption
82
+
83
+ # ν•˜λ‚˜μ˜ μ΄λ―Έμ§€λ§Œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
84
+ def generate_blog_post_single(image, desc, style):
85
+ if image is not None and desc.strip() != "":
86
+ result = generate_blog_post_in_korean(image, desc, style)
87
+ return result
88
+ else:
89
+ return "" # 이미지가 μ—†κ±°λ‚˜ μ„€λͺ…이 μ—†μœΌλ©΄ 빈 λ¬Έμžμ—΄ λ°˜ν™˜
90
+
91
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ • (ν•˜λ‚˜μ˜ 이미지와 μ„€λͺ…λ§Œ λ°›μŒ)
92
+ iface = gr.Interface(
93
+ fn=generate_blog_post_single,
94
+ inputs=[
95
+ gr.File(label="이미지 μ—…λ‘œλ“œ"), # gr.Image λŒ€μ‹  gr.File둜 λ³€κ²½
96
+ gr.Textbox(label="사진에 λŒ€ν•œ μ„€λͺ… μž…λ ₯", placeholder="사진 μ„€λͺ…을 μž…λ ₯ν•˜μ„Έμš”"),
97
+ gr.Radio(["사싀적인", "감성적인"], label="μ„€λͺ… μŠ€νƒ€μΌ 선택", value="사싀적인") # default -> value둜 λ³€κ²½
98
+ ],
99
+ outputs=gr.Textbox(label="이미지 μ„€λͺ… κ²°κ³Ό"),
100
+ title="이미지 μ„€λͺ… 생성기",
101
+ description="ν•˜λ‚˜μ˜ 이미지와 ν…μŠ€νŠΈλ₯Ό λ°”νƒ•μœΌλ‘œ μ΅œμƒμ˜ ν•œκ΅­μ–΄λ‘œ ν‘œν˜„ν•©λ‹ˆλ‹€.",
102
+ allow_flagging="never"
103
+ )
104
+
105
+ if __name__ == "__main__":
106
+ iface.launch(share=True)