Spaces:

soiz
/

audio-text

Running

App Files Files Community

soiz commited on Nov 22, 2024

Commit

321acf4

verified ·

1 Parent(s): 00d8be3

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -18

app.py CHANGED Viewed

@@ -1,24 +1,53 @@
-import gradio as gr
 from transformers import pipeline
-# Whisperモデルを読み込み
-model_name = "openai/whisper-small"  # 他のサイズも使用可能（tiny, base, largeなど）
 transcriber = pipeline("automatic-speech-recognition", model=model_name)
-def transcribe(audio):
     """
-    音声ファイルを文字起こしする関数
     """
-    result = transcriber(audio)
-    return result["text"]
-# Gradioインターフェース
-with gr.Blocks() as demo:
-    gr.Markdown("### 音声文字起こしデモ")
-    audio_input = gr.Audio(type="filepath", label="音声ファイルをアップロード")
-    text_output = gr.Textbox(label="文字起こし結果")
-    transcribe_button = gr.Button("文字起こし")
-    transcribe_button.click(transcribe, inputs=audio_input, outputs=text_output)
-# アプリケーションを起動
-demo.launch()

+import base64
+import tempfile
+from flask import Flask, request, jsonify
 from transformers import pipeline
+app = Flask(__name__)
+# Whisperモデルを準備
+model_name = "openai/whisper-small"
 transcriber = pipeline("automatic-speech-recognition", model=model_name)
+def decode_audio(data_url):
+    """
+    dataURLをデコードして一時ファイルに保存し、そのファイルパスを返す
+    """
+    # `dataURL`形式からヘッダーとデータ部分を分離
+    header, encoded = data_url.split(",", 1)
+    audio_data = base64.b64decode(encoded)
+    # 一時ファイルに保存
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
+        temp_audio_file.write(audio_data)
+        return temp_audio_file.name
+@app.route('/transcribe', methods=['POST'])
+def transcribe_audio():
     """
+    POSTリクエストで送信されたdataURLを文字起こし
     """
+    try:
+        # JSONからdataURLを取得
+        data = request.json
+        data_url = data.get("dataURL")
+        if not data_url:
+            return jsonify({"error": "Missing 'dataURL' in request"}), 400
+        # 音声データをデコードして一時ファイルパスを取得
+        audio_file_path = decode_audio(data_url)
+        # Whisperで文字起こし
+        result = transcriber(audio_file_path)
+        text = result["text"]
+        # 一時ファイルを削除（必要なら実装）
+        # os.remove(audio_file_path)
+        return jsonify({"transcription": text})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    app.run(debug=True)