syamashita commited on
Commit
5cf8e9a
·
verified ·
1 Parent(s): 4c447c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -40
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import streamlit as st
3
  import tempfile
4
  from pydub import AudioSegment
@@ -6,24 +5,23 @@ from pyannote.audio import Pipeline
6
  from faster_whisper import WhisperModel
7
  from docx import Document
8
  from io import BytesIO
9
- import os
10
  import colorsys
11
 
12
- # Hugging Face アクセストークン
13
- HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXX")
14
-
15
- # Streamlit設定
16
  st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
17
  st.title("🧠 話者分離付き文字起こしアプリ")
18
 
19
- # モデル選択(Whisper
20
  model_size = st.selectbox("Whisperモデルを選択", ["tiny", "base", "small", "medium", "large-v2"], index=2)
21
 
22
- # 音声ファイルアップロード
23
- uploaded_file = st.file_uploader("音声ファイルをアップロード(mp3, wav, m4a)", type=["mp3", "wav", "m4a"])
 
 
 
24
 
 
25
  def generate_color_palette(n):
26
- """人数に応じて色を自動生成"""
27
  colors = []
28
  for i in range(n):
29
  hue = i / n
@@ -34,7 +32,8 @@ def generate_color_palette(n):
34
  colors.append(hex_color)
35
  return colors
36
 
37
- if uploaded_file:
 
38
  st.audio(uploaded_file)
39
 
40
  if st.button("▶️ 文字起こしスタート"):
@@ -42,26 +41,23 @@ if uploaded_file:
42
  progress = st.progress(0)
43
 
44
  try:
45
- # .wavへ変換して一時保存
46
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
47
  audio = AudioSegment.from_file(uploaded_file)
48
- audio.export(tmp_wav.name, format="wav")
49
- audio_path = tmp_wav.name
50
 
51
  progress.progress(20)
52
- status.info("🔎 話者分離中...")
53
- pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
54
  diarization = pipeline(audio_path)
55
 
56
  progress.progress(50)
57
- status.info("📝 Whisperモデルで文字起こし中...")
58
  model = WhisperModel(model_size, compute_type="int8")
59
  segments, _ = model.transcribe(audio_path, language="ja", vad_filter=True)
60
 
61
- progress.progress(70)
62
- status.info("📄 結果を整形中…")
63
-
64
- # 話者ラベルをマージ
65
  word_blocks = []
66
  for segment in segments:
67
  start = segment.start
@@ -72,40 +68,36 @@ if uploaded_file:
72
  break
73
  word_blocks.append((speaker, segment.text.strip()))
74
 
75
- # 話者色を生成
 
 
 
 
76
  unique_speakers = sorted(set(s for s, _ in word_blocks))
77
  colors = generate_color_palette(len(unique_speakers))
78
  color_map = {spk: col for spk, col in zip(unique_speakers, colors)}
79
 
80
- progress.progress(90)
81
- status.success("✅ 完了!")
82
-
83
- # 表示
84
- st.subheader("🗣️ 話者ごとの文字起こし結果")
85
  for speaker, text in word_blocks:
86
  st.markdown(
87
  f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:6px;'>"
88
- f"<b>{speaker}</b>: {text}"
89
- f"</div>",
90
  unsafe_allow_html=True
91
  )
92
 
93
- # Word出力
94
  doc = Document()
95
  for speaker, text in word_blocks:
96
  doc.add_paragraph(f"{speaker}: {text}")
97
- doc_io = BytesIO()
98
- doc.save(doc_io)
99
- doc_io.seek(0)
100
 
101
- st.download_button(
102
- label="💾 Wordファイルでダウンロード",
103
- data=doc_io,
104
- file_name="transcription.docx",
105
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
106
- )
107
 
108
  progress.progress(100)
109
 
110
  except Exception as e:
111
  st.error(f"❌ エラーが発生しました:\n\n{e}")
 
 
 
 
 
1
  import streamlit as st
2
  import tempfile
3
  from pydub import AudioSegment
 
5
  from faster_whisper import WhisperModel
6
  from docx import Document
7
  from io import BytesIO
 
8
  import colorsys
9
 
10
+ # ページ設定
 
 
 
11
  st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
12
  st.title("🧠 話者分離付き文字起こしアプリ")
13
 
14
+ # Whisperモデル選択
15
  model_size = st.selectbox("Whisperモデルを選択", ["tiny", "base", "small", "medium", "large-v2"], index=2)
16
 
17
+ # Hugging Face トークン入力
18
+ token_input = st.text_input("🔐 Hugging Face アクセストークンを入力", type="password")
19
+
20
+ # 音声アップロード
21
+ uploaded_file = st.file_uploader("🎵 音声ファイルをアップロード(mp3, wav, m4a)", type=["mp3", "wav", "m4a"])
22
 
23
+ # カラーパレット生成
24
  def generate_color_palette(n):
 
25
  colors = []
26
  for i in range(n):
27
  hue = i / n
 
32
  colors.append(hex_color)
33
  return colors
34
 
35
+ # 処理スタート
36
+ if uploaded_file and token_input:
37
  st.audio(uploaded_file)
38
 
39
  if st.button("▶️ 文字起こしスタート"):
 
41
  progress = st.progress(0)
42
 
43
  try:
44
+ # 音声を.wavに変換
45
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
46
  audio = AudioSegment.from_file(uploaded_file)
47
+ audio.export(tmp.name, format="wav")
48
+ audio_path = tmp.name
49
 
50
  progress.progress(20)
51
+ status.info("🔍 話者分離中...")
52
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=token_input)
53
  diarization = pipeline(audio_path)
54
 
55
  progress.progress(50)
56
+ status.info("📝 Whisperで文字起こし中...")
57
  model = WhisperModel(model_size, compute_type="int8")
58
  segments, _ = model.transcribe(audio_path, language="ja", vad_filter=True)
59
 
60
+ # 話者割当
 
 
 
61
  word_blocks = []
62
  for segment in segments:
63
  start = segment.start
 
68
  break
69
  word_blocks.append((speaker, segment.text.strip()))
70
 
71
+ progress.progress(80)
72
+ status.success("✅ 完了!")
73
+
74
+ # 表示と色分け
75
+ st.subheader("🗣️ 話者ごとの文字起こし")
76
  unique_speakers = sorted(set(s for s, _ in word_blocks))
77
  colors = generate_color_palette(len(unique_speakers))
78
  color_map = {spk: col for spk, col in zip(unique_speakers, colors)}
79
 
 
 
 
 
 
80
  for speaker, text in word_blocks:
81
  st.markdown(
82
  f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:6px;'>"
83
+ f"<b>{speaker}</b>: {text}</div>",
 
84
  unsafe_allow_html=True
85
  )
86
 
87
+ # Wordファイル出力
88
  doc = Document()
89
  for speaker, text in word_blocks:
90
  doc.add_paragraph(f"{speaker}: {text}")
91
+ docx_io = BytesIO()
92
+ doc.save(docx_io)
93
+ docx_io.seek(0)
94
 
95
+ st.download_button("💾 Wordファイルでダウンロード", docx_io, file_name="transcription.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
 
 
 
 
 
96
 
97
  progress.progress(100)
98
 
99
  except Exception as e:
100
  st.error(f"❌ エラーが発生しました:\n\n{e}")
101
+ elif uploaded_file and not token_input:
102
+ st.warning("🔐 Hugging Face のトークンを入力してください。")
103
+