syamashita commited on
Commit
4c447c4
·
verified ·
1 Parent(s): f486b36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -91
app.py CHANGED
@@ -1,113 +1,111 @@
 
1
  import streamlit as st
2
  import tempfile
3
- import requests
4
- import os
5
  from pydub import AudioSegment
6
  from pyannote.audio import Pipeline
7
  from faster_whisper import WhisperModel
8
  from docx import Document
9
  from io import BytesIO
 
 
10
 
11
- # ------------------------------------------
12
- # トークン検証関数
13
- # ------------------------------------------
14
- def is_token_valid(token: str) -> bool:
15
- try:
16
- headers = {"Authorization": f"Bearer {token}"}
17
- response = requests.get("https://huggingface.co/api/whoami-v2", headers=headers)
18
- return response.status_code == 200
19
- except:
20
- return False
21
-
22
- # ------------------------------------------
23
- # ✅ Streamlit UI
24
- # ------------------------------------------
25
- st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
26
- st.title("🎤 話者分離付き文字起こしアプリ(Hugging Face対応)")
27
-
28
- st.markdown("このアプリは、音声ファイルをアップロードすると話者分離と文字起こしを行い、話者ごとに色分けして表示し、Wordファイルでダウンロードできます。")
29
 
30
- # Hugging Face トークンの入力(安全な入力)
31
- token = st.text_input("🔑 Hugging Face アクセストークンを入力してください", type="password")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- uploaded_file = st.file_uploader("🎵 音声ファイルをアップロード(mp3, wav, m4a)", type=["mp3", "wav", "m4a"])
34
  if uploaded_file:
35
  st.audio(uploaded_file)
36
 
37
  if st.button("▶️ 文字起こしスタート"):
38
-
39
- # トークンチェック
40
- if not token or not is_token_valid(token):
41
- st.error("❌ 有効な Hugging Face トークンを入力してください。")
42
- st.stop()
43
-
44
  status = st.info("準備中…")
45
  progress = st.progress(0)
46
 
47
- # 一時ファイルに保存
48
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
49
- sound = AudioSegment.from_file(uploaded_file)
50
- sound.export(tmp.name, format="wav")
51
- audio_path = tmp.name
52
-
53
- progress.progress(20)
54
-
55
- # 話者分離
56
- status.info("話者分離中…")
57
  try:
58
- pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=token)
 
 
 
 
 
 
 
 
59
  diarization = pipeline(audio_path)
60
- except Exception as e:
61
- st.error(f"❌ 話者分離エラー: {e}")
62
- st.stop()
63
-
64
- progress.progress(50)
65
-
66
- # Whisperで文字起こし
67
- status.info("Whisperモデルで文字起こし中…")
68
- model = WhisperModel("small", compute_type="int8")
69
- segments, _ = model.transcribe(audio_path, vad_filter=True, language="ja")
70
-
71
- progress.progress(70)
72
-
73
- # 話者ごとのテキスト作成
74
- transcript = ""
75
- word_blocks = []
76
- for segment in segments:
77
- start = segment.start
78
- speaker = "unknown"
79
- for turn in diarization.itertracks(yield_label=True):
80
- if turn[0].start <= start <= turn[0].end:
81
- speaker = turn[2]
82
- break
83
- line = f"[{speaker}] {segment.text.strip()}"
84
- word_blocks.append((speaker, segment.text.strip()))
85
- transcript += line + "\n"
86
-
87
- progress.progress(90)
88
- status.success("完了!")
89
-
90
- # 表示(色分け)
91
- st.subheader("📝 話者ごとの文字起こし結果")
92
- colors = ["#E6F7FF", "#FFFAE6", "#E6FFEA", "#F9E6FF"]
93
- speakers = list(sorted(set(s for s, _ in word_blocks)))
94
- color_map = {s: colors[i % len(colors)] for i, s in enumerate(speakers)}
95
-
96
- for speaker, text in word_blocks:
97
- st.markdown(
98
- f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:4px;'>"
99
- f"<b>{speaker}</b>: {text}"
100
- f"</div>",
101
- unsafe_allow_html=True
 
 
 
 
 
 
 
 
 
102
  )
103
 
104
- # Wordファイル出力
105
- doc = Document()
106
- for speaker, text in word_blocks:
107
- doc.add_paragraph(f"{speaker}: {text}")
108
- docx_io = BytesIO()
109
- doc.save(docx_io)
110
- docx_io.seek(0)
111
 
112
- st.download_button("💾 Wordファイルでダウンロード", docx_io, file_name="transcription.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
113
- progress.progress(100)
 
1
+ # app.py
2
  import streamlit as st
3
  import tempfile
 
 
4
  from pydub import AudioSegment
5
  from pyannote.audio import Pipeline
6
  from faster_whisper import WhisperModel
7
  from docx import Document
8
  from io import BytesIO
9
+ import os
10
+ import colorsys
11
 
12
+ # Hugging Face アクセストークン
13
+ HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXX")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Streamlit設定
16
+ st.set_page_config(page_title="話者分離付き文字起こし", layout="centered")
17
+ st.title("🧠 話者分離付き文字起こしアプリ")
18
+
19
+ # モデル選択(Whisper)
20
+ model_size = st.selectbox("Whisperモデルを選択", ["tiny", "base", "small", "medium", "large-v2"], index=2)
21
+
22
+ # 音声ファイルアップロード
23
+ uploaded_file = st.file_uploader("音声ファイルをアップロード(mp3, wav, m4a)", type=["mp3", "wav", "m4a"])
24
+
25
+ def generate_color_palette(n):
26
+ """人数に応じて色を自動生成"""
27
+ colors = []
28
+ for i in range(n):
29
+ hue = i / n
30
+ lightness = 0.85
31
+ saturation = 0.6
32
+ rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
33
+ hex_color = '#%02x%02x%02x' % tuple(int(c * 255) for c in rgb)
34
+ colors.append(hex_color)
35
+ return colors
36
 
 
37
  if uploaded_file:
38
  st.audio(uploaded_file)
39
 
40
  if st.button("▶️ 文字起こしスタート"):
 
 
 
 
 
 
41
  status = st.info("準備中…")
42
  progress = st.progress(0)
43
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
+ # .wavへ変換して一時保存
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
47
+ audio = AudioSegment.from_file(uploaded_file)
48
+ audio.export(tmp_wav.name, format="wav")
49
+ audio_path = tmp_wav.name
50
+
51
+ progress.progress(20)
52
+ status.info("🔎 話者分離中...")
53
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN)
54
  diarization = pipeline(audio_path)
55
+
56
+ progress.progress(50)
57
+ status.info("📝 Whisperモデルで文字起こし中...")
58
+ model = WhisperModel(model_size, compute_type="int8")
59
+ segments, _ = model.transcribe(audio_path, language="ja", vad_filter=True)
60
+
61
+ progress.progress(70)
62
+ status.info("📄 結果を整形中…")
63
+
64
+ # 話者ラベルをマージ
65
+ word_blocks = []
66
+ for segment in segments:
67
+ start = segment.start
68
+ speaker = "unknown"
69
+ for turn in diarization.itertracks(yield_label=True):
70
+ if turn[0].start <= start <= turn[0].end:
71
+ speaker = turn[2]
72
+ break
73
+ word_blocks.append((speaker, segment.text.strip()))
74
+
75
+ # 話者色を生成
76
+ unique_speakers = sorted(set(s for s, _ in word_blocks))
77
+ colors = generate_color_palette(len(unique_speakers))
78
+ color_map = {spk: col for spk, col in zip(unique_speakers, colors)}
79
+
80
+ progress.progress(90)
81
+ status.success("✅ 完了!")
82
+
83
+ # 表示
84
+ st.subheader("🗣️ 話者ごとの文字起こし結果")
85
+ for speaker, text in word_blocks:
86
+ st.markdown(
87
+ f"<div style='background-color:{color_map[speaker]}; padding:8px; border-radius:5px; margin-bottom:6px;'>"
88
+ f"<b>{speaker}</b>: {text}"
89
+ f"</div>",
90
+ unsafe_allow_html=True
91
+ )
92
+
93
+ # Word出力
94
+ doc = Document()
95
+ for speaker, text in word_blocks:
96
+ doc.add_paragraph(f"{speaker}: {text}")
97
+ doc_io = BytesIO()
98
+ doc.save(doc_io)
99
+ doc_io.seek(0)
100
+
101
+ st.download_button(
102
+ label="💾 Wordファイルでダウンロード",
103
+ data=doc_io,
104
+ file_name="transcription.docx",
105
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
106
  )
107
 
108
+ progress.progress(100)
 
 
 
 
 
 
109
 
110
+ except Exception as e:
111
+ st.error(f"❌ エラーが発生しました:\n\n{e}")