Spaces:

syamashita
/

speaker_transcriber

Runtime error

App Files Files Community

syamashita commited on Mar 28

Commit

f486b36

verified ·

1 Parent(s): f2ee535

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -8

app.py CHANGED Viewed

@@ -1,41 +1,76 @@
-# app.py
 import streamlit as st
 import tempfile
 from pydub import AudioSegment
 from pyannote.audio import Pipeline
 from faster_whisper import WhisperModel
 from docx import Document
 from io import BytesIO
-# Streamlit設定
 st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
 st.title("🎤 話者分離付き文字起こしアプリ（Hugging Face対応）")
-# 音声アップロード
-uploaded_file = st.file_uploader("音声ファイルをアップロード（mp3, wav, m4a）", type=["mp3", "wav", "m4a"])
 if uploaded_file:
     st.audio(uploaded_file)
     if st.button("▶️ 文字起こしスタート"):
         status = st.info("準備中…")
         progress = st.progress(0)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             sound = AudioSegment.from_file(uploaded_file)
             sound.export(tmp.name, format="wav")
             audio_path = tmp.name
         progress.progress(20)
         status.info("話者分離中…")
-        diarization = Pipeline.from_pretrained("pyannote/speaker-diarization")(audio_path)
         progress.progress(50)
         status.info("Whisperモデルで文字起こし中…")
         model = WhisperModel("small", compute_type="int8")
         segments, _ = model.transcribe(audio_path, vad_filter=True, language="ja")
         progress.progress(70)
-        # 話者情報を付与
         transcript = ""
         word_blocks = []
         for segment in segments:
@@ -52,7 +87,7 @@ if uploaded_file:
         progress.progress(90)
         status.success("完了！")
-        # 色分け＆表示
         st.subheader("📝 話者ごとの文字起こし結果")
         colors = ["#E6F7FF", "#FFFAE6", "#E6FFEA", "#F9E6FF"]
         speakers = list(sorted(set(s for s, _ in word_blocks)))
@@ -66,7 +101,7 @@ if uploaded_file:
                 unsafe_allow_html=True
             )
-        # Wordファイルとして出力
         doc = Document()
         for speaker, text in word_blocks:
             doc.add_paragraph(f"{speaker}: {text}")

 import streamlit as st
 import tempfile
+import requests
+import os
 from pydub import AudioSegment
 from pyannote.audio import Pipeline
 from faster_whisper import WhisperModel
 from docx import Document
 from io import BytesIO
+# ------------------------------------------
+# ✅ トークン検証関数
+# ------------------------------------------
+def is_token_valid(token: str) -> bool:
+    try:
+        headers = {"Authorization": f"Bearer {token}"}
+        response = requests.get("https://huggingface.co/api/whoami-v2", headers=headers)
+        return response.status_code == 200
+    except:
+        return False
+# ------------------------------------------
+# ✅ Streamlit UI
+# ------------------------------------------
 st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
 st.title("🎤 話者分離付き文字起こしアプリ（Hugging Face対応）")
+st.markdown("このアプリは、音声ファイルをアップロードすると話者分離と文字起こしを行い、話者ごとに色分けして表示し、Wordファイルでダウンロードできます。")
+# Hugging Face トークンの入力（安全な入力）
+token = st.text_input("🔑 Hugging Face アクセストークンを入力してください", type="password")
+uploaded_file = st.file_uploader("🎵 音声ファイルをアップロード（mp3, wav, m4a）", type=["mp3", "wav", "m4a"])
 if uploaded_file:
     st.audio(uploaded_file)
     if st.button("▶️ 文字起こしスタート"):
+        # トークンチェック
+        if not token or not is_token_valid(token):
+            st.error("❌ 有効な Hugging Face トークンを入力してください。")
+            st.stop()
         status = st.info("準備中…")
         progress = st.progress(0)
+        # 一時ファイルに保存
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
             sound = AudioSegment.from_file(uploaded_file)
             sound.export(tmp.name, format="wav")
             audio_path = tmp.name
         progress.progress(20)
+        # 話者分離
         status.info("話者分離中…")
+        try:
+            pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=token)
+            diarization = pipeline(audio_path)
+        except Exception as e:
+            st.error(f"❌ 話者分離エラー: {e}")
+            st.stop()
         progress.progress(50)
+        # Whisperで文字起こし
         status.info("Whisperモデルで文字起こし中…")
         model = WhisperModel("small", compute_type="int8")
         segments, _ = model.transcribe(audio_path, vad_filter=True, language="ja")
         progress.progress(70)
+        # 話者ごとのテキスト作成
         transcript = ""
         word_blocks = []
         for segment in segments:
         progress.progress(90)
         status.success("完了！")
+        # 表示（色分け）
         st.subheader("📝 話者ごとの文字起こし結果")
         colors = ["#E6F7FF", "#FFFAE6", "#E6FFEA", "#F9E6FF"]
         speakers = list(sorted(set(s for s, _ in word_blocks)))
                 unsafe_allow_html=True
             )
+        # Wordファイル出力
         doc = Document()
         for speaker, text in word_blocks:
             doc.add_paragraph(f"{speaker}: {text}")