openfree commited on
Commit
396a304
ยท
verified ยท
1 Parent(s): 6b13125

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -52
app.py CHANGED
@@ -1098,10 +1098,9 @@ def generate_images_parallel(prompt_3d: str, prompt_photo: str) -> Tuple[Optiona
1098
  # PPT Generation Functions - FIXED VERSION
1099
  ##############################################################################
1100
  def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> list:
1101
- """Parse LLM response to extract slide content - COMPLETELY FIXED VERSION"""
1102
  slides = []
1103
 
1104
- # Debug: ์ „์ฒด ์‘๋‹ต ํ™•์ธ
1105
  logger.info(f"Parsing LLM response, total length: {len(response)}")
1106
  logger.debug(f"First 500 chars: {response[:500]}")
1107
 
@@ -1114,24 +1113,45 @@ def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> l
1114
  except:
1115
  pass
1116
 
1117
- # Split by slide markers and process each section
1118
- # ์Šฌ๋ผ์ด๋“œ๋ฅผ ๊ตฌ๋ถ„ํ•˜๋Š” ๋” ๊ฐ•๋ ฅํ•œ ์ •๊ทœ์‹
1119
- slide_pattern = r'(?:^|\n)(?:์Šฌ๋ผ์ด๋“œ|Slide)\s*\d+|(?:^|\n)\d+[\.)](?:\s|$)'
1120
-
1121
- # ์Šฌ๋ผ์ด๋“œ ์„น์…˜์œผ๋กœ ๋ถ„ํ• 
1122
- sections = re.split(slide_pattern, response, flags=re.MULTILINE | re.IGNORECASE)
1123
-
1124
- # ์ฒซ ๋ฒˆ์งธ ๋นˆ ์„น์…˜ ์ œ๊ฑฐ
1125
- if sections and not sections[0].strip():
1126
- sections = sections[1:]
1127
-
1128
- logger.info(f"Found {len(sections)} potential slide sections")
1129
-
1130
- for idx, section in enumerate(sections):
1131
- if not section.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132
  continue
1133
 
1134
- logger.debug(f"Processing section {idx}: {section[:100]}...")
1135
 
1136
  slide = {
1137
  'title': '',
@@ -1142,9 +1162,9 @@ def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> l
1142
  }
1143
 
1144
  # ์„น์…˜ ๋‚ด์—์„œ ์ œ๋ชฉ, ๋‚ด์šฉ, ๋…ธํŠธ ์ถ”์ถœ
1145
- lines = section.strip().split('\n')
1146
  current_part = None
1147
- title_lines = []
1148
  content_lines = []
1149
  notes_lines = []
1150
 
@@ -1154,11 +1174,11 @@ def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> l
1154
  continue
1155
 
1156
  # ์ œ๋ชฉ ์„น์…˜ ๊ฐ์ง€
1157
- if line.startswith('์ œ๋ชฉ:') or line.startswith('Title:'):
1158
  current_part = 'title'
1159
  title_text = line.split(':', 1)[1].strip() if ':' in line else ''
1160
- if title_text:
1161
- title_lines.append(title_text)
1162
  # ๋‚ด์šฉ ์„น์…˜ ๊ฐ์ง€
1163
  elif line.startswith('๋‚ด์šฉ:') or line.startswith('Content:'):
1164
  current_part = 'content'
@@ -1173,57 +1193,91 @@ def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> l
1173
  notes_lines.append(notes_text)
1174
  # ํ˜„์žฌ ์„น์…˜์— ๋”ฐ๋ผ ๋‚ด์šฉ ์ถ”๊ฐ€
1175
  else:
1176
- if current_part == 'title' and not title_lines:
1177
- title_lines.append(line)
1178
  elif current_part == 'content':
1179
  content_lines.append(line)
1180
  elif current_part == 'notes':
1181
  notes_lines.append(line)
1182
- elif not current_part and not title_lines:
1183
  # ์ฒซ ๋ฒˆ์งธ ์ค„์„ ์ œ๋ชฉ์œผ๋กœ
1184
- title_lines.append(line)
1185
- current_part = 'content' # ์ดํ›„ ์ค„๋“ค์€ content๋กœ
1186
- elif not current_part:
 
 
1187
  content_lines.append(line)
1188
 
1189
  # ์Šฌ๋ผ์ด๋“œ ๋ฐ์ดํ„ฐ ์„ค์ •
1190
- slide['title'] = ' '.join(title_lines).strip()
1191
  slide['content'] = '\n'.join(content_lines).strip()
1192
  slide['notes'] = ' '.join(notes_lines).strip()
1193
 
1194
- # ์ œ๋ชฉ ์ •๋ฆฌ
1195
- slide['title'] = re.sub(r'^(์Šฌ๋ผ์ด๋“œ|Slide)\s*\d+\s*[:๏ผš\-]?\s*', '', slide['title'], flags=re.IGNORECASE)
1196
- slide['title'] = re.sub(r'^(์ œ๋ชฉ|Title)\s*[:๏ผš]\s*', '', slide['title'], flags=re.IGNORECASE)
1197
-
1198
  # ๋‚ด์šฉ์ด ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ ์ถ”๊ฐ€
1199
  if slide['title'] or slide['content']:
1200
  logger.info(f"Slide {len(slides)+1}: Title='{slide['title'][:30]}...', Content length={len(slide['content'])}")
1201
  slides.append(slide)
1202
 
1203
- # ๋งŒ์•ฝ ์œ„ ๋ฐฉ๋ฒ•์œผ๋กœ ํŒŒ์‹ฑ์ด ์•ˆ ๋˜์—ˆ๋‹ค๋ฉด, ๋” ๊ฐ„๋‹จํ•œ ๋ฐฉ๋ฒ• ์‹œ๋„
1204
- if not slides:
1205
- logger.warning("Primary parsing failed, trying fallback method...")
 
 
 
 
1206
 
1207
- # ๋”๋ธ” ๋‰ด๋ผ์ธ์œผ๋กœ ๊ตฌ๋ถ„
1208
- sections = response.split('\n\n')
1209
  for section in sections:
1210
- lines = section.strip().split('\n')
1211
- if len(lines) >= 2: # ์ตœ์†Œ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์ด ์žˆ์–ด์•ผ ํ•จ
1212
- slide = {
1213
- 'title': lines[0].strip(),
1214
- 'content': '\n'.join(lines[1:]).strip(),
1215
- 'notes': '',
1216
- 'layout': 'title_content',
1217
- 'chart_data': None
1218
- }
1219
 
1220
- # ์ œ๋ชฉ ์ •๋ฆฌ
1221
- slide['title'] = re.sub(r'^(์Šฌ๋ผ์ด๋“œ|Slide)\s*\d+\s*[:๏ผš\-]?\s*', '', slide['title'], flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1222
 
1223
- if slide['title'] and slide['content']:
1224
- slides.append(slide)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1225
 
1226
  logger.info(f"Total slides parsed: {len(slides)}")
 
 
 
 
 
 
1227
  return slides
1228
 
1229
  def force_font_size(text_frame, font_size_pt: int, theme: Dict):
 
1098
  # PPT Generation Functions - FIXED VERSION
1099
  ##############################################################################
1100
  def parse_llm_ppt_response(response: str, layout_style: str = "consistent") -> list:
1101
+ """Parse LLM response to extract slide content - FIXED VERSION"""
1102
  slides = []
1103
 
 
1104
  logger.info(f"Parsing LLM response, total length: {len(response)}")
1105
  logger.debug(f"First 500 chars: {response[:500]}")
1106
 
 
1113
  except:
1114
  pass
1115
 
1116
+ # ๋” ์ •ํ™•ํ•œ ์Šฌ๋ผ์ด๋“œ ๊ตฌ๋ถ„ ํŒจํ„ด
1117
+ # "์Šฌ๋ผ์ด๋“œ 1", "์Šฌ๋ผ์ด๋“œ 2" ๋˜๋Š” "Slide 1", "Slide 2" ํ˜•์‹์„ ์ฐพ์Œ
1118
+ slide_markers = []
1119
+
1120
+ # ์Šฌ๋ผ์ด๋“œ ๋งˆ์ปค์˜ ์œ„์น˜๋ฅผ ๋จผ์ € ์ฐพ์Œ
1121
+ for match in re.finditer(r'^(?:์Šฌ๋ผ์ด๋“œ|Slide)\s*(\d+)\s*$', response, re.MULTILINE):
1122
+ slide_markers.append({
1123
+ 'index': int(match.group(1)),
1124
+ 'start': match.start(),
1125
+ 'end': match.end()
1126
+ })
1127
+
1128
+ logger.info(f"Found {len(slide_markers)} slide markers")
1129
+
1130
+ # ์Šฌ๋ผ์ด๋“œ ๋งˆ์ปค๊ฐ€ ์—†์œผ๋ฉด ๋‹ค๋ฅธ ํŒจํ„ด ์‹œ๋„
1131
+ if not slide_markers:
1132
+ # ์ˆซ์ž๋งŒ์œผ๋กœ ์‹œ์ž‘ํ•˜๋Š” ํŒจํ„ด๋„ ์ฐพ๊ธฐ (์˜ˆ: "1.", "2." ๋“ฑ)
1133
+ for match in re.finditer(r'^(\d+)[.)]\s*$', response, re.MULTILINE):
1134
+ slide_markers.append({
1135
+ 'index': int(match.group(1)),
1136
+ 'start': match.start(),
1137
+ 'end': match.end()
1138
+ })
1139
+
1140
+ # ๊ฐ ์Šฌ๋ผ์ด๋“œ ๋งˆ์ปค ์‚ฌ์ด์˜ ๋‚ด์šฉ์„ ์ถ”์ถœ
1141
+ for i, marker in enumerate(slide_markers):
1142
+ # ํ˜„์žฌ ์Šฌ๋ผ์ด๋“œ์˜ ์‹œ์ž‘๊ณผ ๋ ์œ„์น˜
1143
+ start = marker['end']
1144
+ if i < len(slide_markers) - 1:
1145
+ end = slide_markers[i + 1]['start']
1146
+ else:
1147
+ end = len(response)
1148
+
1149
+ section = response[start:end].strip()
1150
+
1151
+ if not section:
1152
  continue
1153
 
1154
+ logger.debug(f"Processing slide {marker['index']}: {section[:100]}...")
1155
 
1156
  slide = {
1157
  'title': '',
 
1162
  }
1163
 
1164
  # ์„น์…˜ ๋‚ด์—์„œ ์ œ๋ชฉ, ๋‚ด์šฉ, ๋…ธํŠธ ์ถ”์ถœ
1165
+ lines = section.split('\n')
1166
  current_part = None
1167
+ title_found = False
1168
  content_lines = []
1169
  notes_lines = []
1170
 
 
1174
  continue
1175
 
1176
  # ์ œ๋ชฉ ์„น์…˜ ๊ฐ์ง€
1177
+ if (line.startswith('์ œ๋ชฉ:') or line.startswith('Title:')) and not title_found:
1178
  current_part = 'title'
1179
  title_text = line.split(':', 1)[1].strip() if ':' in line else ''
1180
+ slide['title'] = title_text
1181
+ title_found = True
1182
  # ๋‚ด์šฉ ์„น์…˜ ๊ฐ์ง€
1183
  elif line.startswith('๋‚ด์šฉ:') or line.startswith('Content:'):
1184
  current_part = 'content'
 
1193
  notes_lines.append(notes_text)
1194
  # ํ˜„์žฌ ์„น์…˜์— ๋”ฐ๋ผ ๋‚ด์šฉ ์ถ”๊ฐ€
1195
  else:
1196
+ if current_part == 'title' and not slide['title']:
1197
+ slide['title'] = line
1198
  elif current_part == 'content':
1199
  content_lines.append(line)
1200
  elif current_part == 'notes':
1201
  notes_lines.append(line)
1202
+ elif not title_found and not slide['title']:
1203
  # ์ฒซ ๋ฒˆ์งธ ์ค„์„ ์ œ๋ชฉ์œผ๋กœ
1204
+ slide['title'] = line
1205
+ title_found = True
1206
+ current_part = 'content'
1207
+ elif current_part is None and title_found:
1208
+ current_part = 'content'
1209
  content_lines.append(line)
1210
 
1211
  # ์Šฌ๋ผ์ด๋“œ ๋ฐ์ดํ„ฐ ์„ค์ •
 
1212
  slide['content'] = '\n'.join(content_lines).strip()
1213
  slide['notes'] = ' '.join(notes_lines).strip()
1214
 
 
 
 
 
1215
  # ๋‚ด์šฉ์ด ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ ์ถ”๊ฐ€
1216
  if slide['title'] or slide['content']:
1217
  logger.info(f"Slide {len(slides)+1}: Title='{slide['title'][:30]}...', Content length={len(slide['content'])}")
1218
  slides.append(slide)
1219
 
1220
+ # ๋งŒ์•ฝ ์œ„ ๋ฐฉ๋ฒ•์œผ๋กœ ํŒŒ์‹ฑ์ด ์•ˆ ๋˜์—ˆ๋‹ค๋ฉด, ๋” ์œ ์—ฐํ•œ ๋ฐฉ๋ฒ• ์‹œ๋„
1221
+ if not slides or len(slides) < 3:
1222
+ logger.warning(f"Primary parsing resulted in only {len(slides)} slides, trying alternative method...")
1223
+ slides = []
1224
+
1225
+ # "์ œ๋ชฉ:" ํŒจํ„ด์œผ๋กœ ์Šฌ๋ผ์ด๋“œ ๊ตฌ๋ถ„ ์‹œ๋„
1226
+ sections = re.split(r'\n(?=์ œ๋ชฉ:|Title:)', response)
1227
 
 
 
1228
  for section in sections:
1229
+ if not section.strip():
1230
+ continue
 
 
 
 
 
 
 
1231
 
1232
+ slide = {
1233
+ 'title': '',
1234
+ 'content': '',
1235
+ 'notes': '',
1236
+ 'layout': 'title_content',
1237
+ 'chart_data': None
1238
+ }
1239
+
1240
+ lines = section.strip().split('\n')
1241
+ current_part = None
1242
+ content_lines = []
1243
+ notes_lines = []
1244
+
1245
+ for line in lines:
1246
+ line = line.strip()
1247
+ if not line:
1248
+ continue
1249
 
1250
+ if line.startswith('์ œ๋ชฉ:') or line.startswith('Title:'):
1251
+ slide['title'] = line.split(':', 1)[1].strip() if ':' in line else ''
1252
+ current_part = 'content'
1253
+ elif line.startswith('๋‚ด์šฉ:') or line.startswith('Content:'):
1254
+ current_part = 'content'
1255
+ elif line.startswith('๋…ธํŠธ:') or line.startswith('Notes:'):
1256
+ current_part = 'notes'
1257
+ notes_text = line.split(':', 1)[1].strip() if ':' in line else ''
1258
+ if notes_text:
1259
+ notes_lines.append(notes_text)
1260
+ elif current_part == 'content':
1261
+ content_lines.append(line)
1262
+ elif current_part == 'notes':
1263
+ notes_lines.append(line)
1264
+
1265
+ slide['content'] = '\n'.join(content_lines).strip()
1266
+ slide['notes'] = ' '.join(notes_lines).strip()
1267
+
1268
+ # ์Šฌ๋ผ์ด๋“œ ๋ฒˆํ˜ธ ์ œ๊ฑฐ
1269
+ slide['title'] = re.sub(r'^(์Šฌ๋ผ์ด๋“œ|Slide)\s*\d+\s*[:๏ผš\-]?\s*', '', slide['title'], flags=re.IGNORECASE)
1270
+
1271
+ if slide['title'] or slide['content']:
1272
+ slides.append(slide)
1273
 
1274
  logger.info(f"Total slides parsed: {len(slides)}")
1275
+
1276
+ # ํŒŒ์‹ฑ ๊ฒฐ๊ณผ ๊ฒ€์ฆ
1277
+ if len(slides) < 3:
1278
+ logger.error("Parsing resulted in too few slides. Raw response preview:")
1279
+ logger.error(response[:1000])
1280
+
1281
  return slides
1282
 
1283
  def force_font_size(text_frame, font_size_pt: int, theme: Dict):