ginipick commited on
Commit
289e506
·
verified ·
1 Parent(s): a050e48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -3
app.py CHANGED
@@ -7,7 +7,10 @@ import numpy as np
7
  import spaces
8
  import torch
9
  from diffusers import DiffusionPipeline
10
- from PIL import Image
 
 
 
11
 
12
  # Create permanent storage directory
13
  SAVE_DIR = "saved_images" # Gradio will handle the persistence
@@ -28,6 +31,126 @@ pipeline = pipeline.to(device)
28
  MAX_SEED = np.iinfo(np.int32).max
29
  MAX_IMAGE_SIZE = 1024
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def save_generated_image(image, prompt):
32
  # Generate unique filename with timestamp
33
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -70,6 +193,13 @@ def inference(
70
  guidance_scale: float,
71
  num_inference_steps: int,
72
  lora_scale: float,
 
 
 
 
 
 
 
73
  progress: gr.Progress = gr.Progress(track_tqdm=True),
74
  ):
75
  if randomize_seed:
@@ -86,12 +216,24 @@ def inference(
86
  joint_attention_kwargs={"scale": lora_scale},
87
  ).images[0]
88
 
 
 
 
 
 
89
  # Save the generated image
90
  filepath = save_generated_image(image, prompt)
91
 
92
  # Return the image, seed, and updated gallery
93
  return image, seed, load_generated_images()
94
 
 
 
 
 
 
 
 
95
  examples = [
96
  "An anime-style illustration of a handsome male character with long, dark, flowing hair tied back partially with a traditional hairpiece. He wears a flowing, light-colored traditional East Asian robe with dark accents. His expression is thoughtful and slightly troubled, with his hand near his temple. In the blurred background, there are other figures in similar traditional attire, suggesting a scene of action or conflict in a fantasy setting. The overall mood is serious and dramatic, reminiscent of wuxia or xianxia genres.",
97
  "A fierce, action-oriented anime illustration of a male knight in full, dark, intricate armor. He has long, flowing dark hair and a confident, determined expression with a slight smirk. He wields a massive, ornate sword with a red glow on its blade, held high above his head in a striking pose. The background is a dramatic, desolate landscape with jagged mountains and a stormy, overcast sky, conveying a sense of epic conflict and adventure.",
@@ -108,8 +250,6 @@ examples = [
108
  "A colossal steampunk clocktower pierces through storm clouds, its gears and mechanisms visible through crystalline walls. 'TIMEKEEPER'S LEGACY' is constructed from intricate brass and copper mechanisms that appear to be in constant motion. Lightning arcs between copper spires, while 'By Theodore Cogsworth' is etched in burnished bronze below. Mathematical equations and alchemical symbols float in the turbulent sky. [trigger]"
109
  ]
110
 
111
-
112
-
113
  with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
114
  gr.HTML('<div class="title"> eBOOK Cover generation </div>')
115
 
@@ -128,6 +268,7 @@ with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
128
  placeholder="Enter your prompt",
129
  container=False,
130
  )
 
131
  run_button = gr.Button("Run", scale=0)
132
 
133
  # Modified to include the default image
@@ -137,6 +278,30 @@ with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
137
  value=DEFAULT_IMAGE_PATH # Set the default image
138
  )
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  with gr.Accordion("Advanced Settings", open=False):
141
  seed = gr.Slider(
142
  label="Seed",
@@ -213,6 +378,26 @@ with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
213
  inputs=None,
214
  outputs=generated_gallery,
215
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  gr.on(
218
  triggers=[run_button.click, prompt.submit],
@@ -226,6 +411,13 @@ with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
226
  guidance_scale,
227
  num_inference_steps,
228
  lora_scale,
 
 
 
 
 
 
 
229
  ],
230
  outputs=[result, seed, generated_gallery],
231
  )
 
7
  import spaces
8
  import torch
9
  from diffusers import DiffusionPipeline
10
+ from PIL import Image, ImageDraw, ImageFont
11
+ import requests
12
+ import json
13
+ import re
14
 
15
  # Create permanent storage directory
16
  SAVE_DIR = "saved_images" # Gradio will handle the persistence
 
31
  MAX_SEED = np.iinfo(np.int32).max
32
  MAX_IMAGE_SIZE = 1024
33
 
34
+ def is_korean_only(text):
35
+ """Check if text contains only Korean characters (excluding spaces and punctuation)"""
36
+ # Remove spaces and common punctuation
37
+ cleaned_text = re.sub(r'[\s\.,!?]', '', text)
38
+ # Check if all remaining characters are Korean
39
+ return bool(cleaned_text) and all('\uAC00' <= char <= '\uD7A3' for char in cleaned_text)
40
+
41
+ def augment_prompt_with_llm(prompt):
42
+ """Augment Korean prompt using Friendli LLM API"""
43
+ token = os.getenv("FRIENDLI_TOKEN")
44
+ if not token:
45
+ return prompt # Return original if no token
46
+
47
+ url = "https://api.friendli.ai/dedicated/v1/chat/completions"
48
+ headers = {
49
+ "Authorization": f"Bearer {token}",
50
+ "Content-Type": "application/json"
51
+ }
52
+
53
+ # Create a system message for prompt augmentation
54
+ system_message = """You are an expert at creating detailed, artistic prompts for ebook cover generation.
55
+ When given a Korean prompt, expand it into a detailed English description suitable for AI image generation.
56
+ Focus on visual elements, artistic style, composition, lighting, and mood.
57
+ Always end the prompt with '[trigger]' to activate the LoRA model."""
58
+
59
+ payload = {
60
+ "model": "dep89a2fld32mcm",
61
+ "messages": [
62
+ {
63
+ "role": "system",
64
+ "content": system_message
65
+ },
66
+ {
67
+ "role": "user",
68
+ "content": f"다음 한국어 프롬프트를 전자책 표지 생성을 위한 상세한 영어 프롬프트로 확장해주세요: {prompt}"
69
+ }
70
+ ],
71
+ "max_tokens": 500,
72
+ "top_p": 0.8,
73
+ "stream": False
74
+ }
75
+
76
+ try:
77
+ response = requests.post(url, json=payload, headers=headers, timeout=30)
78
+ if response.status_code == 200:
79
+ result = response.json()
80
+ augmented_prompt = result['choices'][0]['message']['content']
81
+ return augmented_prompt
82
+ else:
83
+ print(f"API Error: {response.status_code}")
84
+ return prompt
85
+ except Exception as e:
86
+ print(f"Error calling LLM API: {e}")
87
+ return prompt
88
+
89
+ def add_text_overlay(image, title_ko, title_en, author_ko, author_en,
90
+ title_position, author_position, text_color):
91
+ """Add text overlay to the generated image"""
92
+ # Create a copy of the image to work with
93
+ img_with_text = image.copy()
94
+ draw = ImageDraw.Draw(img_with_text)
95
+
96
+ # Try to load a better font, fallback to default if not available
97
+ try:
98
+ # You may need to adjust the font path based on your system
99
+ title_font_size = 48
100
+ author_font_size = 32
101
+ # For production, you'd want to include proper font files
102
+ title_font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", title_font_size)
103
+ author_font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", author_font_size)
104
+ except:
105
+ # Fallback to default font
106
+ title_font = ImageFont.load_default()
107
+ author_font = ImageFont.load_default()
108
+
109
+ # Get image dimensions
110
+ img_width, img_height = img_with_text.size
111
+
112
+ # Define position mappings
113
+ position_coords = {
114
+ "Top": (img_width // 2, img_height // 10),
115
+ "Center": (img_width // 2, img_height // 2),
116
+ "Bottom": (img_width // 2, img_height * 9 // 10)
117
+ }
118
+
119
+ # Draw title
120
+ if title_ko or title_en:
121
+ title_text = f"{title_ko}\n{title_en}" if title_ko and title_en else title_ko or title_en
122
+ title_x, title_y = position_coords[title_position]
123
+
124
+ # Get text bbox for centering
125
+ bbox = draw.textbbox((0, 0), title_text, font=title_font)
126
+ text_width = bbox[2] - bbox[0]
127
+ text_height = bbox[3] - bbox[1]
128
+
129
+ # Draw text with shadow for better visibility
130
+ shadow_offset = 2
131
+ draw.text((title_x - text_width // 2 + shadow_offset, title_y - text_height // 2 + shadow_offset),
132
+ title_text, font=title_font, fill="black", align="center")
133
+ draw.text((title_x - text_width // 2, title_y - text_height // 2),
134
+ title_text, font=title_font, fill=text_color, align="center")
135
+
136
+ # Draw author
137
+ if author_ko or author_en:
138
+ author_text = f"{author_ko}\n{author_en}" if author_ko and author_en else author_ko or author_en
139
+ author_x, author_y = position_coords[author_position]
140
+
141
+ # Get text bbox for centering
142
+ bbox = draw.textbbox((0, 0), author_text, font=author_font)
143
+ text_width = bbox[2] - bbox[0]
144
+ text_height = bbox[3] - bbox[1]
145
+
146
+ # Draw text with shadow
147
+ draw.text((author_x - text_width // 2 + shadow_offset, author_y - text_height // 2 + shadow_offset),
148
+ author_text, font=author_font, fill="black", align="center")
149
+ draw.text((author_x - text_width // 2, author_y - text_height // 2),
150
+ author_text, font=author_font, fill=text_color, align="center")
151
+
152
+ return img_with_text
153
+
154
  def save_generated_image(image, prompt):
155
  # Generate unique filename with timestamp
156
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
193
  guidance_scale: float,
194
  num_inference_steps: int,
195
  lora_scale: float,
196
+ title_ko: str,
197
+ title_en: str,
198
+ author_ko: str,
199
+ author_en: str,
200
+ title_position: str,
201
+ author_position: str,
202
+ text_color: str,
203
  progress: gr.Progress = gr.Progress(track_tqdm=True),
204
  ):
205
  if randomize_seed:
 
216
  joint_attention_kwargs={"scale": lora_scale},
217
  ).images[0]
218
 
219
+ # Add text overlay if any text is provided
220
+ if any([title_ko, title_en, author_ko, author_en]):
221
+ image = add_text_overlay(image, title_ko, title_en, author_ko, author_en,
222
+ title_position, author_position, text_color)
223
+
224
  # Save the generated image
225
  filepath = save_generated_image(image, prompt)
226
 
227
  # Return the image, seed, and updated gallery
228
  return image, seed, load_generated_images()
229
 
230
+ def augment_prompt(prompt):
231
+ """Handle prompt augmentation"""
232
+ if is_korean_only(prompt):
233
+ augmented = augment_prompt_with_llm(prompt)
234
+ return augmented
235
+ return prompt
236
+
237
  examples = [
238
  "An anime-style illustration of a handsome male character with long, dark, flowing hair tied back partially with a traditional hairpiece. He wears a flowing, light-colored traditional East Asian robe with dark accents. His expression is thoughtful and slightly troubled, with his hand near his temple. In the blurred background, there are other figures in similar traditional attire, suggesting a scene of action or conflict in a fantasy setting. The overall mood is serious and dramatic, reminiscent of wuxia or xianxia genres.",
239
  "A fierce, action-oriented anime illustration of a male knight in full, dark, intricate armor. He has long, flowing dark hair and a confident, determined expression with a slight smirk. He wields a massive, ornate sword with a red glow on its blade, held high above his head in a striking pose. The background is a dramatic, desolate landscape with jagged mountains and a stormy, overcast sky, conveying a sense of epic conflict and adventure.",
 
250
  "A colossal steampunk clocktower pierces through storm clouds, its gears and mechanisms visible through crystalline walls. 'TIMEKEEPER'S LEGACY' is constructed from intricate brass and copper mechanisms that appear to be in constant motion. Lightning arcs between copper spires, while 'By Theodore Cogsworth' is etched in burnished bronze below. Mathematical equations and alchemical symbols float in the turbulent sky. [trigger]"
251
  ]
252
 
 
 
253
  with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=False) as demo:
254
  gr.HTML('<div class="title"> eBOOK Cover generation </div>')
255
 
 
268
  placeholder="Enter your prompt",
269
  container=False,
270
  )
271
+ augment_button = gr.Button("증강", scale=0)
272
  run_button = gr.Button("Run", scale=0)
273
 
274
  # Modified to include the default image
 
278
  value=DEFAULT_IMAGE_PATH # Set the default image
279
  )
280
 
281
+ with gr.Accordion("Text Overlay Settings", open=False):
282
+ with gr.Row():
283
+ with gr.Column():
284
+ title_ko = gr.Textbox(label="Title (Korean)", placeholder="한글 제목")
285
+ title_en = gr.Textbox(label="Title (English)", placeholder="English Title")
286
+ title_position = gr.Radio(
287
+ label="Title Position",
288
+ choices=["Top", "Center", "Bottom"],
289
+ value="Top"
290
+ )
291
+ with gr.Column():
292
+ author_ko = gr.Textbox(label="Author (Korean)", placeholder="지은이")
293
+ author_en = gr.Textbox(label="Author (English)", placeholder="Author Name")
294
+ author_position = gr.Radio(
295
+ label="Author Position",
296
+ choices=["Top", "Center", "Bottom"],
297
+ value="Bottom"
298
+ )
299
+
300
+ text_color = gr.ColorPicker(
301
+ label="Text Color",
302
+ value="#FFFFFF"
303
+ )
304
+
305
  with gr.Accordion("Advanced Settings", open=False):
306
  seed = gr.Slider(
307
  label="Seed",
 
378
  inputs=None,
379
  outputs=generated_gallery,
380
  )
381
+
382
+ # Augment button handler
383
+ augment_button.click(
384
+ fn=augment_prompt,
385
+ inputs=[prompt],
386
+ outputs=[prompt],
387
+ )
388
+
389
+ # Auto-augment Korean prompts
390
+ def handle_prompt_change(prompt_text):
391
+ if is_korean_only(prompt_text):
392
+ return augment_prompt_with_llm(prompt_text)
393
+ return prompt_text
394
+
395
+ # Optional: Auto-augment on prompt change (commented out to avoid too many API calls)
396
+ # prompt.change(
397
+ # fn=handle_prompt_change,
398
+ # inputs=[prompt],
399
+ # outputs=[prompt]
400
+ # )
401
 
402
  gr.on(
403
  triggers=[run_button.click, prompt.submit],
 
411
  guidance_scale,
412
  num_inference_steps,
413
  lora_scale,
414
+ title_ko,
415
+ title_en,
416
+ author_ko,
417
+ author_en,
418
+ title_position,
419
+ author_position,
420
+ text_color,
421
  ],
422
  outputs=[result, seed, generated_gallery],
423
  )