fantaxy commited on
Commit
c6c37f5
·
verified ·
1 Parent(s): b075ad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -20
app.py CHANGED
@@ -192,9 +192,117 @@ Important rules:
192
  if current_scene:
193
  english_scenes.append(current_scene.strip())
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  return {
196
  "korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
197
- "english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"]
 
 
 
198
  }
199
 
200
  except Exception as e:
@@ -202,7 +310,7 @@ Important rules:
202
  return self.generate_scene_breakdown_fallback(video_info)
203
 
204
  # ---------------------------------------------------------
205
- # Fallback 음성/대사 추출 (한글/영어)
206
  # ---------------------------------------------------------
207
  def generate_scene_breakdown_fallback(self, video_info):
208
  duration = video_info.get("duration", 0)
@@ -213,7 +321,10 @@ Important rules:
213
  if not duration:
214
  return {
215
  "korean": ["**[재생시간 알 수 없음]**: 타임스탬프를 생성할 수 없습니다"],
216
- "english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
 
 
 
217
  }
218
 
219
  # 비디오 타입에 따른 대사 템플릿
@@ -228,8 +339,11 @@ Important rules:
228
 
229
  korean_scenes = []
230
  english_scenes = []
 
 
 
 
231
  num_segments = min(duration // segment_length + 1, 20)
232
- video_type = self.detect_video_type_detailed(title, description)
233
 
234
  for i in range(num_segments):
235
  start_time = i * segment_length
@@ -240,19 +354,31 @@ Important rules:
240
 
241
  # 음성 추출이 불가능한 경우의 기본 템플릿
242
  if i == 0:
243
- korean_desc = f"(음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)"
244
- english_desc = f"(Gemini API required for speech extraction. Intro section...)"
 
 
 
245
  elif i == num_segments - 1:
246
- korean_desc = f"(아웃트로 부분...)"
247
- english_desc = f"(Outro section...)"
 
 
 
248
  else:
249
- korean_desc = f"(본문 내용...)"
250
- english_desc = f"(Main content...)"
251
-
252
- korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {korean_desc}")
253
- english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: {english_desc}")
254
-
255
- return {"korean": korean_scenes, "english": english_scenes}
 
 
 
 
 
 
256
 
257
  # ---------------------------------------------------------
258
  # 비디오 유형 감지(상세)
@@ -463,7 +589,7 @@ Important rules:
463
  return str(num)
464
 
465
  # ---------------------------------------------------------
466
- # 최종 리포트 생성 (한글/영어 분리)
467
  # ---------------------------------------------------------
468
  def format_video_info(self, video_info):
469
  if not video_info:
@@ -484,6 +610,9 @@ Important rules:
484
  scene_data = self.generate_scene_breakdown_gemini(video_info)
485
  korean_scenes = scene_data.get("korean", [])
486
  english_scenes = scene_data.get("english", [])
 
 
 
487
 
488
  vtype = self.detect_video_type(title, video_info.get("description", ""))
489
  bgm = self.detect_background_music(video_info)
@@ -523,10 +652,17 @@ Important rules:
523
  {'─'*30}
524
  {chr(10).join(english_scenes)}
525
 
526
- 📝 설명 미리보기 / DESCRIPTION PREVIEW
527
- {'─'*25}
528
- {video_info.get('description', 'No description available')[:500]}
529
- {'...(생략/truncated)' if len(video_info.get('description', '')) > 500 else ''}
 
 
 
 
 
 
 
530
 
531
  {'='*50}
532
  📊 **분석 완료/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 
192
  if current_scene:
193
  english_scenes.append(current_scene.strip())
194
 
195
+ # 중국어 번역 생성
196
+ chinese_prompt = f"""
197
+ 将以下韩语语音/对话转录翻译成中文,保持完全相同的时间戳。
198
+ 只翻译实际的语音内容,不要描述:
199
+
200
+ {chr(10).join(korean_scenes)}
201
+
202
+ 重要规则:
203
+ - 保持格式完全相同:**[MM:SS-MM:SS]**: "语音的中文翻译"
204
+ - 对于像(배경음악)这样的非语音部分,翻译为(背景音乐)
205
+ - 对于(...)保持原样
206
+ - 对于像[이도학 교수]这样的采访标签,翻译为[李道学教授]
207
+ - 对于[자막],翻译为[字幕]
208
+ - 保留实际语音的引号
209
+ - 不要添加任何场景描述或解释
210
+ """
211
+ chinese_response = self.gemini_model.generate_content(chinese_prompt)
212
+
213
+ chinese_scenes = []
214
+ if chinese_response and chinese_response.text:
215
+ lines = chinese_response.text.split("\n")
216
+ current_scene = ""
217
+
218
+ for line in lines:
219
+ line = line.strip()
220
+ if line.startswith("**[") and "]**:" in line:
221
+ if current_scene:
222
+ chinese_scenes.append(current_scene.strip())
223
+ current_scene = line
224
+ elif current_scene:
225
+ current_scene += "\n" + line
226
+
227
+ if current_scene:
228
+ chinese_scenes.append(current_scene.strip())
229
+
230
+ # 태국어 번역 생성
231
+ thai_prompt = f"""
232
+ แปลคำบรรยายเสียง/บทสนทนาภาษาเกาหลีต่อไปนี้เป็นภาษาไทย โดยคงรูปแบบเวลาเดิมไว้
233
+ แปลเฉพาะเนื้อหาเสียงจริงเท่านั้น ไม่ต้องบรรยาย:
234
+
235
+ {chr(10).join(korean_scenes)}
236
+
237
+ กฎสำคัญ:
238
+ - คงรูปแบบเดิมไว้: **[MM:SS-MM:SS]**: "คำแปลภาษาไทยของเสียงพูด"
239
+ - สำหรับส่วนที่ไม่ใช่เสียงพูด เช่น (배경음악) แปลเป็น (เพลงประกอบ)
240
+ - สำหรับ (...) ให้คงเดิม
241
+ - สำหรับป้ายสัมภาษณ์ เช่น [이도학 교수] แปลเป็น [ศาสตราจารย์ อี โด-ฮัก]
242
+ - สำหรับ [자막] แปลเป็น [คำบรรยาย]
243
+ - คงเครื่องหมายคำพูดสำหรับเสียงพูดจริง
244
+ - ไม่ต้องเพิ่มคำบรรยายฉากหรือคำอธิบายใดๆ
245
+ """
246
+ thai_response = self.gemini_model.generate_content(thai_prompt)
247
+
248
+ thai_scenes = []
249
+ if thai_response and thai_response.text:
250
+ lines = thai_response.text.split("\n")
251
+ current_scene = ""
252
+
253
+ for line in lines:
254
+ line = line.strip()
255
+ if line.startswith("**[") and "]**:" in line:
256
+ if current_scene:
257
+ thai_scenes.append(current_scene.strip())
258
+ current_scene = line
259
+ elif current_scene:
260
+ current_scene += "\n" + line
261
+
262
+ if current_scene:
263
+ thai_scenes.append(current_scene.strip())
264
+
265
+ # 러시아어 번역 생성
266
+ russian_prompt = f"""
267
+ Переведите следующую корейскую транскрипцию речи/диалога на русский язык, сохраняя точно такие же временные метки.
268
+ Переводите ТОЛЬКО фактическое содержание речи, а не описания:
269
+
270
+ {chr(10).join(korean_scenes)}
271
+
272
+ Важные правила:
273
+ - Сохраняйте формат точно таким же: **[MM:SS-MM:SS]**: "Русский перевод речи"
274
+ - Для частей без речи, таких как (배경음악), переведите как (фоновая музыка)
275
+ - Для (...) оставьте как есть
276
+ - Для тегов интервью, таких как [이도학 교수], переведите как [Профессор Ли До Хак]
277
+ - Для [자막] переведите как [Субтитры]
278
+ - Сохраняйте кавычки для фактической речи
279
+ - НЕ добавляйте никаких описаний сцен или объяснений
280
+ """
281
+ russian_response = self.gemini_model.generate_content(russian_prompt)
282
+
283
+ russian_scenes = []
284
+ if russian_response and russian_response.text:
285
+ lines = russian_response.text.split("\n")
286
+ current_scene = ""
287
+
288
+ for line in lines:
289
+ line = line.strip()
290
+ if line.startswith("**[") and "]**:" in line:
291
+ if current_scene:
292
+ russian_scenes.append(current_scene.strip())
293
+ current_scene = line
294
+ elif current_scene:
295
+ current_scene += "\n" + line
296
+
297
+ if current_scene:
298
+ russian_scenes.append(current_scene.strip())
299
+
300
  return {
301
  "korean": korean_scenes if korean_scenes else self.generate_scene_breakdown_fallback(video_info)["korean"],
302
+ "english": english_scenes if english_scenes else self.generate_scene_breakdown_fallback(video_info)["english"],
303
+ "chinese": chinese_scenes if chinese_scenes else [],
304
+ "thai": thai_scenes if thai_scenes else [],
305
+ "russian": russian_scenes if russian_scenes else []
306
  }
307
 
308
  except Exception as e:
 
310
  return self.generate_scene_breakdown_fallback(video_info)
311
 
312
  # ---------------------------------------------------------
313
+ # Fallback 음성/대사 추출 (5개 언어)
314
  # ---------------------------------------------------------
315
  def generate_scene_breakdown_fallback(self, video_info):
316
  duration = video_info.get("duration", 0)
 
321
  if not duration:
322
  return {
323
  "korean": ["**[재생시간 알 수 없음]**: 타임스탬프를 생성할 수 없습니다"],
324
+ "english": ["**[Duration Unknown]**: Unable to generate timestamped breakdown"],
325
+ "chinese": ["**[持续时间未知]**: 无法生成带时间戳的分解"],
326
+ "thai": ["**[ไม่ทราบระยะเวลา]**: ไม่สามารถสร้างการแบ่งส่วนตามเวลาได้"],
327
+ "russian": ["**[Продолжительность неизвестна]**: Невозможно создать временную разбивку"]
328
  }
329
 
330
  # 비디오 타입에 따른 대사 템플릿
 
339
 
340
  korean_scenes = []
341
  english_scenes = []
342
+ chinese_scenes = []
343
+ thai_scenes = []
344
+ russian_scenes = []
345
+
346
  num_segments = min(duration // segment_length + 1, 20)
 
347
 
348
  for i in range(num_segments):
349
  start_time = i * segment_length
 
354
 
355
  # 음성 추출이 불가능한 경우의 기본 템플릿
356
  if i == 0:
357
+ korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (음성 추출을 위해 Gemini API가 필요합니다. 인트로 부분...)")
358
+ english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Gemini API required for speech extraction. Intro section...)")
359
+ chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (需要Gemini API进行语音提取。介绍部分...)")
360
+ thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ต้องใช้ Gemini API สำหรับการดึงเสียง ส่วนเปิดตัว...)")
361
+ russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Требуется Gemini API для извлечения речи. Вступительная часть...)")
362
  elif i == num_segments - 1:
363
+ korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (아웃트로 부분...)")
364
+ english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Outro section...)")
365
+ chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (结尾部分...)")
366
+ thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (ส่วนจบ...)")
367
+ russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Заключительная часть...)")
368
  else:
369
+ korean_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (본문 내용...)")
370
+ english_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Main content...)")
371
+ chinese_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (主要内容...)")
372
+ thai_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (เนื้อหาหลัก...)")
373
+ russian_scenes.append(f"**[{start_fmt}-{end_fmt}]**: (Основное содержание...)")
374
+
375
+ return {
376
+ "korean": korean_scenes,
377
+ "english": english_scenes,
378
+ "chinese": chinese_scenes,
379
+ "thai": thai_scenes,
380
+ "russian": russian_scenes
381
+ }
382
 
383
  # ---------------------------------------------------------
384
  # 비디오 유형 감지(상세)
 
589
  return str(num)
590
 
591
  # ---------------------------------------------------------
592
+ # 최종 리포트 생성 (5개 언어)
593
  # ---------------------------------------------------------
594
  def format_video_info(self, video_info):
595
  if not video_info:
 
610
  scene_data = self.generate_scene_breakdown_gemini(video_info)
611
  korean_scenes = scene_data.get("korean", [])
612
  english_scenes = scene_data.get("english", [])
613
+ chinese_scenes = scene_data.get("chinese", [])
614
+ thai_scenes = scene_data.get("thai", [])
615
+ russian_scenes = scene_data.get("russian", [])
616
 
617
  vtype = self.detect_video_type(title, video_info.get("description", ""))
618
  bgm = self.detect_background_music(video_info)
 
652
  {'─'*30}
653
  {chr(10).join(english_scenes)}
654
 
655
+ 🎙️ 音频/对话提取 (中文) / SPEECH/DIALOGUE EXTRACTION (CHINESE)
656
+ {'─'*30}
657
+ {chr(10).join(chinese_scenes) if chinese_scenes else "(중국어 번역 없음 / No Chinese translation available)"}
658
+
659
+ 🎙️ การดึงเสียง/บทสนทนา (ไทย) / SPEECH/DIALOGUE EXTRACTION (THAI)
660
+ {'─'*30}
661
+ {chr(10).join(thai_scenes) if thai_scenes else "(태국어 번역 없음 / No Thai translation available)"}
662
+
663
+ 🎙️ Извлечение речи/диалога (Русский) / SPEECH/DIALOGUE EXTRACTION (RUSSIAN)
664
+ {'─'*30}
665
+ {chr(10).join(russian_scenes) if russian_scenes else "(러시아어 번역 없음 / No Russian translation available)"}
666
 
667
  {'='*50}
668
  📊 **분석 완료/Analysis completed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}