Spaces:

Kentlo
/

meetingminute

Sleeping

App Files Files Community

Kentlo commited on Aug 9

Commit

f1de142

verified ·

1 Parent(s): 0478965

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -39

app.py CHANGED Viewed

@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 """
-Colab & Hugging Face Spaces 공용 app.py (세련된 UI 버전)
-- Colab: 누락 패키지 자동 설치
-- Spaces: requirements.txt로 설치된 상태 가정
-- Whisper(faster-whisper)로 STT + koBART 요약
 """
-import os, sys, subprocess, tempfile
 from datetime import datetime
 def _in_colab() -> bool:
     try:
         import google.colab  # noqa
@@ -16,48 +17,64 @@ def _in_colab() -> bool:
     except Exception:
         return False
-def _ensure_packages():
-    if not _in_colab():
-        return
     # ffmpeg
-    try:
-        subprocess.run(["ffmpeg", "-version"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except Exception:
-        subprocess.run(["apt-get", "update", "-y"], check=False)
-        subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=False)
-    # pip pkgs
-    def pip_install(pkgs):
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet"] + pkgs)
-    for mod, pkg in [("faster_whisper","faster-whisper==1.*"),
-                     ("gradio","gradio==4.*"),
-                     ("transformers","transformers==4.*"),
-                     ("pydub","pydub")]:
         try:
             __import__(mod)
         except Exception:
-            pip_install([pkg])
-_ensure_packages()
-# ===== Imports =====
 import gradio as gr
 from pydub import AudioSegment
 from transformers import pipeline
 from faster_whisper import WhisperModel
-# ===== Models =====
 DEFAULT_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")  # tiny/base/small/medium/large-v3
-device = "cuda" if os.path.exists("/proc/driver/nvidia") else "cpu"
 compute_type = "float16" if device == "cuda" else "int8"
-# 최초 인스턴스 (필요 시 고급설정에서 변경)
 _asr = WhisperModel(DEFAULT_WHISPER_SIZE, device=device, compute_type=compute_type)
-_summarizer = pipeline("summarization", model="gogamza/kobart-summarization")
 # ===== Utils =====
 def convert_to_wav(src_path: str) -> str:
     if src_path.lower().endswith(".wav"):
         return src_path
     sound = AudioSegment.from_file(src_path)
     fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
     os.close(fd)
@@ -78,6 +95,7 @@ def chunk_text(txt: str, max_chars=850):
     return parts
 def summarize_long(text: str) -> str:
     chunks = chunk_text(text)
     partial = []
     for c in chunks:
@@ -93,23 +111,29 @@ def save_minutes_to_file(minutes_text: str) -> str:
         f.write(minutes_text)
     return path
-# ===== Core Logic =====
 def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
     if not audio_path:
-        return "⚠️ 오디오가 입력되지 않았습니다.", "", "", None, gr.update(visible=True, value="⚠️ 오디오를 업로드하거나 녹음해 주세요.")
-    # Whisper 모델 변경 요청 시 재로딩 (간단 캐시)
     global _asr
-    if model_size and model_size != DEFAULT_WHISPER_SIZE:
-        try:
             _asr = WhisperModel(model_size, device=device, compute_type=compute_type)
-        except Exception as e:
-            return "", "", "", None, gr.update(visible=True, value=f"⚠️ 모델 로드 실패: {e}")
     wav_path = None
     try:
         wav_path = convert_to_wav(audio_path)
         language = None if auto_detect_lang else "ko"
         segments, info = _asr.transcribe(
             wav_path,
@@ -119,7 +143,8 @@ def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
         )
         text = "".join(seg.text for seg in segments).strip()
         if not text:
-            return "⚠️ 인식된 텍스트가 없습니다.", "", "", None, gr.update(visible=True, value="⚠️ 음성 인식 결과가 비어 있습니다.")
         summary = summarize_long(text)
@@ -133,10 +158,12 @@ def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
 {text}
 """
         file_path = save_minutes_to_file(minutes)
-        return text, summary, minutes, file_path, gr.update(visible=True, value="✅ 완료! 결과를 확인하세요.")
     except Exception as e:
-        return "", "", "", None, gr.update(visible=True, value=f"⚠️ 처리 중 오류: {e}")
     finally:
         if wav_path and wav_path != audio_path and os.path.exists(wav_path):
             try: os.remove(wav_path)
@@ -218,7 +245,6 @@ with gr.Blocks(title="회의록 자동 생성기 (Whisper)", theme=theme, css=CU
     gr.HTML('<div class="footer">© Whisper + KoBART · Designed for Colab & Hugging Face Spaces</div>')
-    # Actions
     run_button.click(
         fn=transcribe_and_summarize,
         inputs=[audio_input, model_size, auto_detect],
@@ -237,3 +263,4 @@ else:
     demo.launch()

 # -*- coding: utf-8 -*-
 """
+Colab & Hugging Face Spaces 공용 app.py (런타임 설치 보강)
+- PyTorch 누락 시 자동 설치 (Colab/Spaces 공통)
+- Colab: ffmpeg 자동 설치 / Spaces: ffmpeg 없으면 경고 표시
+- Whisper(faster-whisper)로 STT + koBART 요약 + 세련된 Gradio UI
 """
+import os, sys, subprocess, tempfile, shutil
 from datetime import datetime
+# ===== Env Detect =====
 def _in_colab() -> bool:
     try:
         import google.colab  # noqa
     except Exception:
         return False
+def _has_cmd(cmd: str) -> bool:
+    return shutil.which(cmd) is not None
+# ===== Runtime Installer =====
+def _pip_install(pkgs):
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet"] + pkgs)
+def _ensure_runtime():
     # ffmpeg
+    if not _has_cmd("ffmpeg"):
+        if _in_colab():
+            subprocess.run(["apt-get", "update", "-y"], check=False)
+            subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=False)
+        # Spaces는 apt.txt 사용 권장 → 여기선 경고만
+    # python packages
+    need = []
+    for mod, pkg in [
+        ("torch", "torch"),
+        ("transformers", "transformers==4.*"),
+        ("sentencepiece", "sentencepiece"),
+        ("faster_whisper", "faster-whisper==1.*"),
+        ("pydub", "pydub"),
+        ("gradio", "gradio==4.*"),
+    ]:
         try:
             __import__(mod)
         except Exception:
+            need.append(pkg)
+    if need:
+        _pip_install(need)
+_ensure_runtime()
+# ===== Imports (after install) =====
 import gradio as gr
 from pydub import AudioSegment
 from transformers import pipeline
 from faster_whisper import WhisperModel
+import torch
+# ===== Device / Models =====
 DEFAULT_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small")  # tiny/base/small/medium/large-v3
+device = "cuda" if torch.cuda.is_available() and os.path.exists("/proc/driver/nvidia") else "cpu"
 compute_type = "float16" if device == "cuda" else "int8"
 _asr = WhisperModel(DEFAULT_WHISPER_SIZE, device=device, compute_type=compute_type)
+_summarizer = pipeline("summarization", model="gogamza/kobart-summarization",
+                       device=0 if device == "cuda" else -1)
 # ===== Utils =====
 def convert_to_wav(src_path: str) -> str:
     if src_path.lower().endswith(".wav"):
         return src_path
+    # ffmpeg 필요
+    if not _has_cmd("ffmpeg"):
+        raise RuntimeError("ffmpeg가 필요합니다. (Spaces: apt.txt에 'ffmpeg' 추가, Colab: 자동 설치됨)")
     sound = AudioSegment.from_file(src_path)
     fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
     os.close(fd)
     return parts
 def summarize_long(text: str) -> str:
+    # koBART가 긴 입력에 약하므로 분할 요약 + 메타 요약
     chunks = chunk_text(text)
     partial = []
     for c in chunks:
         f.write(minutes_text)
     return path
+# ===== Core =====
 def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
+    # ffmpeg 체크 (Spaces에서 자주 빠짐)
+    if not _has_cmd("ffmpeg"):
+        return ("", "", "", None,
+                gr.update(visible=True, value="⚠️ ffmpeg가 없습니다. Spaces에서는 apt.txt에 'ffmpeg'를 추가하세요."))
     if not audio_path:
+        return ("⚠️ 오디오가 입력되지 않았습니다.", "", "", None,
+                gr.update(visible=True, value="⚠️ 오디오를 업로드하���나 녹음해 주세요."))
+    # 모델 크기 변경 시 동적 재로딩
     global _asr
+    try:
+        if model_size and model_size != DEFAULT_WHISPER_SIZE:
             _asr = WhisperModel(model_size, device=device, compute_type=compute_type)
+    except Exception as e:
+        return ("", "", "", None,
+                gr.update(visible=True, value=f"⚠️ Whisper 모델 로드 실패: {e}"))
     wav_path = None
     try:
         wav_path = convert_to_wav(audio_path)
         language = None if auto_detect_lang else "ko"
         segments, info = _asr.transcribe(
             wav_path,
         )
         text = "".join(seg.text for seg in segments).strip()
         if not text:
+            return ("⚠️ 인식된 텍스트가 없습니다.", "", "", None,
+                    gr.update(visible=True, value="⚠️ 음성 인식 결과가 비어 있습니다."))
         summary = summarize_long(text)
 {text}
 """
         file_path = save_minutes_to_file(minutes)
+        return (text, summary, minutes, file_path,
+                gr.update(visible=True, value="✅ 완료! 결과를 확인하세요."))
     except Exception as e:
+        return ("", "", "", None,
+                gr.update(visible=True, value=f"⚠️ 처리 중 오류: {e}"))
     finally:
         if wav_path and wav_path != audio_path and os.path.exists(wav_path):
             try: os.remove(wav_path)
     gr.HTML('<div class="footer">© Whisper + KoBART · Designed for Colab & Hugging Face Spaces</div>')
     run_button.click(
         fn=transcribe_and_summarize,
         inputs=[audio_input, model_size, auto_detect],
     demo.launch()