Spaces:

syamashita
/

speaker_transcriber

Runtime error

App Files Files Community

syamashita commited on Mar 28

Commit

5cf8e9a

verified ·

1 Parent(s): 4c447c4

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -40

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import streamlit as st
 import tempfile
 from pydub import AudioSegment
@@ -6,24 +5,23 @@ from pyannote.audio import Pipeline
 from faster_whisper import WhisperModel
 from docx import Document
 from io import BytesIO
-import os
 import colorsys
-# Hugging Face アクセストークン
-HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXX")
-# Streamlit設定
 st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
 st.title("🧠 話者分離付き文字起こしアプリ")
-# モデル選択（Whisper）
 model_size = st.selectbox("Whisperモデルを選択", ["tiny", "base", "small", "medium", "large-v2"], index=2)
-# 音声ファイルアップロード
-uploaded_file = st.file_uploader("音声ファイルをアップロード（mp3, wav, m4a）", type=["mp3", "wav", "m4a"])
 def generate_color_palette(n):
-    """人数に応じて色を自動生成"""
     colors = []
     for i in range(n):
         hue = i / n
@@ -34,7 +32,8 @@ def generate_color_palette(n):
         colors.append(hex_color)
     return colors
-if uploaded_file:
     st.audio(uploaded_file)
     if st.button("▶️ 文字起こしスタート"):
@@ -42,26 +41,23 @@ if uploaded_file:
         progress = st.progress(0)
         try:
-            # .wavへ変換して一時保存
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
                 audio = AudioSegment.from_file(uploaded_file)
-                audio.export(tmp_wav.name, format="wav")
-                audio_path = tmp_wav.name
             progress.progress(20)
-            status.info("🔎 話者分離中...")
-            pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
             diarization = pipeline(audio_path)
             progress.progress(50)
-            status.info("📝 Whisperモデルで文字起こし中...")
             model = WhisperModel(model_size, compute_type="int8")
             segments, _ = model.transcribe(audio_path, language="ja", vad_filter=True)
-            progress.progress(70)
-            status.info("📄 結果を整形中…")
-            # 話者ラベルをマージ
             word_blocks = []
             for segment in segments:
                 start = segment.start
@@ -72,40 +68,36 @@ if uploaded_file:
                         break
                 word_blocks.append((speaker, segment.text.strip()))
-            # 話者色を生成
             unique_speakers = sorted(set(s for s, _ in word_blocks))
             colors = generate_color_palette(len(unique_speakers))
             color_map = {spk: col for spk, col in zip(unique_speakers, colors)}
-            progress.progress(90)
-            status.success("✅ 完了！")
-            # 表示
-            st.subheader("🗣️ 話者ごとの文字起こし結果")
             for speaker, text in word_blocks:
                 st.markdown(
                     f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:6px;'>"
-                    f"<b>{speaker}</b>: {text}"
-                    f"</div>",
                     unsafe_allow_html=True
                 )
-            # Word出力
             doc = Document()
             for speaker, text in word_blocks:
                 doc.add_paragraph(f"{speaker}: {text}")
-            doc_io = BytesIO()
-            doc.save(doc_io)
-            doc_io.seek(0)
-            st.download_button(
-                label="💾 Wordファイルでダウンロード",
-                data=doc_io,
-                file_name="transcription.docx",
-                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-            )
             progress.progress(100)
         except Exception as e:
             st.error(f"❌ エラーが発生しました:\n\n{e}")

 import streamlit as st
 import tempfile
 from pydub import AudioSegment
 from faster_whisper import WhisperModel
 from docx import Document
 from io import BytesIO
 import colorsys
+# ページ設定
 st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
 st.title("🧠 話者分離付き文字起こしアプリ")
+# Whisperモデル選択
 model_size = st.selectbox("Whisperモデルを選択", ["tiny", "base", "small", "medium", "large-v2"], index=2)
+# Hugging Face トークン入力
+token_input = st.text_input("🔐 Hugging Face アクセストークンを入力", type="password")
+# 音声アップロード
+uploaded_file = st.file_uploader("🎵 音声ファイルをアップロード（mp3, wav, m4a）", type=["mp3", "wav", "m4a"])
+# カラーパレット生成
 def generate_color_palette(n):
     colors = []
     for i in range(n):
         hue = i / n
         colors.append(hex_color)
     return colors
+# 処理スタート
+if uploaded_file and token_input:
     st.audio(uploaded_file)
     if st.button("▶️ 文字起こしスタート"):
         progress = st.progress(0)
         try:
+            # 音声を.wavに変換
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
                 audio = AudioSegment.from_file(uploaded_file)
+                audio.export(tmp.name, format="wav")
+                audio_path = tmp.name
             progress.progress(20)
+            status.info("🔍 話者分離中...")
+            pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=token_input)
             diarization = pipeline(audio_path)
             progress.progress(50)
+            status.info("📝 Whisperで文字起こし中...")
             model = WhisperModel(model_size, compute_type="int8")
             segments, _ = model.transcribe(audio_path, language="ja", vad_filter=True)
+            # 話者割当
             word_blocks = []
             for segment in segments:
                 start = segment.start
                         break
                 word_blocks.append((speaker, segment.text.strip()))
+            progress.progress(80)
+            status.success("✅ 完了！")
+            # 表示と色分け
+            st.subheader("🗣️ 話者ごとの文字起こし")
             unique_speakers = sorted(set(s for s, _ in word_blocks))
             colors = generate_color_palette(len(unique_speakers))
             color_map = {spk: col for spk, col in zip(unique_speakers, colors)}
             for speaker, text in word_blocks:
                 st.markdown(
                     f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:6px;'>"
+                    f"<b>{speaker}</b>: {text}</div>",
                     unsafe_allow_html=True
                 )
+            # Wordファイル出力
             doc = Document()
             for speaker, text in word_blocks:
                 doc.add_paragraph(f"{speaker}: {text}")
+            docx_io = BytesIO()
+            doc.save(docx_io)
+            docx_io.seek(0)
+            st.download_button("💾 Wordファイルでダウンロード", docx_io, file_name="transcription.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
             progress.progress(100)
         except Exception as e:
             st.error(f"❌ エラーが発生しました:\n\n{e}")
+elif uploaded_file and not token_input:
+    st.warning("🔐 Hugging Face のトークンを入力してください。")