Kentlo commited on
Commit
f1de142
ยท
verified ยท
1 Parent(s): 0478965

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -39
app.py CHANGED
@@ -1,14 +1,15 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- Colab & Hugging Face Spaces ๊ณต์šฉ app.py (์„ธ๋ จ๋œ UI ๋ฒ„์ „)
4
- - Colab: ๋ˆ„๋ฝ ํŒจํ‚ค์ง€ ์ž๋™ ์„ค์น˜
5
- - Spaces: requirements.txt๋กœ ์„ค์น˜๋œ ์ƒํƒœ ๊ฐ€์ •
6
- - Whisper(faster-whisper)๋กœ STT + koBART ์š”์•ฝ
7
  """
8
 
9
- import os, sys, subprocess, tempfile
10
  from datetime import datetime
11
 
 
12
  def _in_colab() -> bool:
13
  try:
14
  import google.colab # noqa
@@ -16,48 +17,64 @@ def _in_colab() -> bool:
16
  except Exception:
17
  return False
18
 
19
- def _ensure_packages():
20
- if not _in_colab():
21
- return
 
 
 
 
 
22
  # ffmpeg
23
- try:
24
- subprocess.run(["ffmpeg", "-version"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
25
- except Exception:
26
- subprocess.run(["apt-get", "update", "-y"], check=False)
27
- subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=False)
28
- # pip pkgs
29
- def pip_install(pkgs):
30
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet"] + pkgs)
31
- for mod, pkg in [("faster_whisper","faster-whisper==1.*"),
32
- ("gradio","gradio==4.*"),
33
- ("transformers","transformers==4.*"),
34
- ("pydub","pydub")]:
 
 
 
 
35
  try:
36
  __import__(mod)
37
  except Exception:
38
- pip_install([pkg])
 
 
 
39
 
40
- _ensure_packages()
41
 
42
- # ===== Imports =====
43
  import gradio as gr
44
  from pydub import AudioSegment
45
  from transformers import pipeline
46
  from faster_whisper import WhisperModel
 
47
 
48
- # ===== Models =====
49
  DEFAULT_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small") # tiny/base/small/medium/large-v3
50
- device = "cuda" if os.path.exists("/proc/driver/nvidia") else "cpu"
51
  compute_type = "float16" if device == "cuda" else "int8"
52
 
53
- # ์ตœ์ดˆ ์ธ์Šคํ„ด์Šค (ํ•„์š” ์‹œ ๊ณ ๊ธ‰์„ค์ •์—์„œ ๋ณ€๊ฒฝ)
54
  _asr = WhisperModel(DEFAULT_WHISPER_SIZE, device=device, compute_type=compute_type)
55
- _summarizer = pipeline("summarization", model="gogamza/kobart-summarization")
 
56
 
57
  # ===== Utils =====
58
  def convert_to_wav(src_path: str) -> str:
59
  if src_path.lower().endswith(".wav"):
60
  return src_path
 
 
 
61
  sound = AudioSegment.from_file(src_path)
62
  fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
63
  os.close(fd)
@@ -78,6 +95,7 @@ def chunk_text(txt: str, max_chars=850):
78
  return parts
79
 
80
  def summarize_long(text: str) -> str:
 
81
  chunks = chunk_text(text)
82
  partial = []
83
  for c in chunks:
@@ -93,23 +111,29 @@ def save_minutes_to_file(minutes_text: str) -> str:
93
  f.write(minutes_text)
94
  return path
95
 
96
- # ===== Core Logic =====
97
  def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
 
 
 
 
 
98
  if not audio_path:
99
- return "โš ๏ธ ์˜ค๋””์˜ค๊ฐ€ ์ž…๋ ฅ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", "", "", None, gr.update(visible=True, value="โš ๏ธ ์˜ค๋””์˜ค๋ฅผ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ๋…น์Œํ•ด ์ฃผ์„ธ์š”.")
 
100
 
101
- # Whisper ๋ชจ๋ธ ๋ณ€๊ฒฝ ์š”์ฒญ ์‹œ ์žฌ๋กœ๋”ฉ (๊ฐ„๋‹จ ์บ์‹œ)
102
  global _asr
103
- if model_size and model_size != DEFAULT_WHISPER_SIZE:
104
- try:
105
  _asr = WhisperModel(model_size, device=device, compute_type=compute_type)
106
- except Exception as e:
107
- return "", "", "", None, gr.update(visible=True, value=f"โš ๏ธ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
 
108
 
109
  wav_path = None
110
  try:
111
  wav_path = convert_to_wav(audio_path)
112
-
113
  language = None if auto_detect_lang else "ko"
114
  segments, info = _asr.transcribe(
115
  wav_path,
@@ -119,7 +143,8 @@ def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
119
  )
120
  text = "".join(seg.text for seg in segments).strip()
121
  if not text:
122
- return "โš ๏ธ ์ธ์‹๋œ ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", "", None, gr.update(visible=True, value="โš ๏ธ ์Œ์„ฑ ์ธ์‹ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
 
123
 
124
  summary = summarize_long(text)
125
 
@@ -133,10 +158,12 @@ def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
133
  {text}
134
  """
135
  file_path = save_minutes_to_file(minutes)
136
- return text, summary, minutes, file_path, gr.update(visible=True, value="โœ… ์™„๋ฃŒ! ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•˜์„ธ์š”.")
 
137
 
138
  except Exception as e:
139
- return "", "", "", None, gr.update(visible=True, value=f"โš ๏ธ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}")
 
140
  finally:
141
  if wav_path and wav_path != audio_path and os.path.exists(wav_path):
142
  try: os.remove(wav_path)
@@ -218,7 +245,6 @@ with gr.Blocks(title="ํšŒ์˜๋ก ์ž๋™ ์ƒ์„ฑ๊ธฐ (Whisper)", theme=theme, css=CU
218
 
219
  gr.HTML('<div class="footer">ยฉ Whisper + KoBART ยท Designed for Colab & Hugging Face Spaces</div>')
220
 
221
- # Actions
222
  run_button.click(
223
  fn=transcribe_and_summarize,
224
  inputs=[audio_input, model_size, auto_detect],
@@ -237,3 +263,4 @@ else:
237
  demo.launch()
238
 
239
 
 
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ Colab & Hugging Face Spaces ๊ณต์šฉ app.py (๋Ÿฐํƒ€์ž„ ์„ค์น˜ ๋ณด๊ฐ•)
4
+ - PyTorch ๋ˆ„๋ฝ ์‹œ ์ž๋™ ์„ค์น˜ (Colab/Spaces ๊ณตํ†ต)
5
+ - Colab: ffmpeg ์ž๋™ ์„ค์น˜ / Spaces: ffmpeg ์—†์œผ๋ฉด ๊ฒฝ๊ณ  ํ‘œ์‹œ
6
+ - Whisper(faster-whisper)๋กœ STT + koBART ์š”์•ฝ + ์„ธ๋ จ๋œ Gradio UI
7
  """
8
 
9
+ import os, sys, subprocess, tempfile, shutil
10
  from datetime import datetime
11
 
12
+ # ===== Env Detect =====
13
  def _in_colab() -> bool:
14
  try:
15
  import google.colab # noqa
 
17
  except Exception:
18
  return False
19
 
20
+ def _has_cmd(cmd: str) -> bool:
21
+ return shutil.which(cmd) is not None
22
+
23
+ # ===== Runtime Installer =====
24
+ def _pip_install(pkgs):
25
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet"] + pkgs)
26
+
27
+ def _ensure_runtime():
28
  # ffmpeg
29
+ if not _has_cmd("ffmpeg"):
30
+ if _in_colab():
31
+ subprocess.run(["apt-get", "update", "-y"], check=False)
32
+ subprocess.run(["apt-get", "install", "-y", "ffmpeg"], check=False)
33
+ # Spaces๋Š” apt.txt ์‚ฌ์šฉ ๊ถŒ์žฅ โ†’ ์—ฌ๊ธฐ์„  ๊ฒฝ๊ณ ๋งŒ
34
+
35
+ # python packages
36
+ need = []
37
+ for mod, pkg in [
38
+ ("torch", "torch"),
39
+ ("transformers", "transformers==4.*"),
40
+ ("sentencepiece", "sentencepiece"),
41
+ ("faster_whisper", "faster-whisper==1.*"),
42
+ ("pydub", "pydub"),
43
+ ("gradio", "gradio==4.*"),
44
+ ]:
45
  try:
46
  __import__(mod)
47
  except Exception:
48
+ need.append(pkg)
49
+
50
+ if need:
51
+ _pip_install(need)
52
 
53
+ _ensure_runtime()
54
 
55
+ # ===== Imports (after install) =====
56
  import gradio as gr
57
  from pydub import AudioSegment
58
  from transformers import pipeline
59
  from faster_whisper import WhisperModel
60
+ import torch
61
 
62
+ # ===== Device / Models =====
63
  DEFAULT_WHISPER_SIZE = os.getenv("WHISPER_MODEL_SIZE", "small") # tiny/base/small/medium/large-v3
64
+ device = "cuda" if torch.cuda.is_available() and os.path.exists("/proc/driver/nvidia") else "cpu"
65
  compute_type = "float16" if device == "cuda" else "int8"
66
 
 
67
  _asr = WhisperModel(DEFAULT_WHISPER_SIZE, device=device, compute_type=compute_type)
68
+ _summarizer = pipeline("summarization", model="gogamza/kobart-summarization",
69
+ device=0 if device == "cuda" else -1)
70
 
71
  # ===== Utils =====
72
  def convert_to_wav(src_path: str) -> str:
73
  if src_path.lower().endswith(".wav"):
74
  return src_path
75
+ # ffmpeg ํ•„์š”
76
+ if not _has_cmd("ffmpeg"):
77
+ raise RuntimeError("ffmpeg๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. (Spaces: apt.txt์— 'ffmpeg' ์ถ”๊ฐ€, Colab: ์ž๋™ ์„ค์น˜๋จ)")
78
  sound = AudioSegment.from_file(src_path)
79
  fd, tmp_wav = tempfile.mkstemp(suffix=".wav")
80
  os.close(fd)
 
95
  return parts
96
 
97
  def summarize_long(text: str) -> str:
98
+ # koBART๊ฐ€ ๊ธด ์ž…๋ ฅ์— ์•ฝํ•˜๋ฏ€๋กœ ๋ถ„ํ•  ์š”์•ฝ + ๋ฉ”ํƒ€ ์š”์•ฝ
99
  chunks = chunk_text(text)
100
  partial = []
101
  for c in chunks:
 
111
  f.write(minutes_text)
112
  return path
113
 
114
+ # ===== Core =====
115
  def transcribe_and_summarize(audio_path, model_size, auto_detect_lang):
116
+ # ffmpeg ์ฒดํฌ (Spaces์—์„œ ์ž์ฃผ ๋น ์ง)
117
+ if not _has_cmd("ffmpeg"):
118
+ return ("", "", "", None,
119
+ gr.update(visible=True, value="โš ๏ธ ffmpeg๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. Spaces์—์„œ๋Š” apt.txt์— 'ffmpeg'๋ฅผ ์ถ”๊ฐ€ํ•˜์„ธ์š”."))
120
+
121
  if not audio_path:
122
+ return ("โš ๏ธ ์˜ค๋””์˜ค๊ฐ€ ์ž…๋ ฅ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", "", "", None,
123
+ gr.update(visible=True, value="โš ๏ธ ์˜ค๋””์˜ค๋ฅผ ์—…๋กœ๋“œํ•˜๏ฟฝ๏ฟฝ๏ฟฝ๋‚˜ ๋…น์Œํ•ด ์ฃผ์„ธ์š”."))
124
 
125
+ # ๋ชจ๋ธ ํฌ๊ธฐ ๋ณ€๊ฒฝ ์‹œ ๋™์  ์žฌ๋กœ๋”ฉ
126
  global _asr
127
+ try:
128
+ if model_size and model_size != DEFAULT_WHISPER_SIZE:
129
  _asr = WhisperModel(model_size, device=device, compute_type=compute_type)
130
+ except Exception as e:
131
+ return ("", "", "", None,
132
+ gr.update(visible=True, value=f"โš ๏ธ Whisper ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}"))
133
 
134
  wav_path = None
135
  try:
136
  wav_path = convert_to_wav(audio_path)
 
137
  language = None if auto_detect_lang else "ko"
138
  segments, info = _asr.transcribe(
139
  wav_path,
 
143
  )
144
  text = "".join(seg.text for seg in segments).strip()
145
  if not text:
146
+ return ("โš ๏ธ ์ธ์‹๋œ ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", "", None,
147
+ gr.update(visible=True, value="โš ๏ธ ์Œ์„ฑ ์ธ์‹ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด ์žˆ์Šต๋‹ˆ๋‹ค."))
148
 
149
  summary = summarize_long(text)
150
 
 
158
  {text}
159
  """
160
  file_path = save_minutes_to_file(minutes)
161
+ return (text, summary, minutes, file_path,
162
+ gr.update(visible=True, value="โœ… ์™„๋ฃŒ! ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•˜์„ธ์š”."))
163
 
164
  except Exception as e:
165
+ return ("", "", "", None,
166
+ gr.update(visible=True, value=f"โš ๏ธ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜: {e}"))
167
  finally:
168
  if wav_path and wav_path != audio_path and os.path.exists(wav_path):
169
  try: os.remove(wav_path)
 
245
 
246
  gr.HTML('<div class="footer">ยฉ Whisper + KoBART ยท Designed for Colab & Hugging Face Spaces</div>')
247
 
 
248
  run_button.click(
249
  fn=transcribe_and_summarize,
250
  inputs=[audio_input, model_size, auto_detect],
 
263
  demo.launch()
264
 
265
 
266
+