Spaces:

Futuresony
/

Speech-recognition

Sleeping

Futuresony commited on Feb 10

Commit

6c3b85f

verified ·

1 Parent(s): 4f467e8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,25 +1,20 @@
 import gradio as gr
-from transformers import pipeline
-import soundfile as sf
 from huggingface_hub import InferenceClient
-# Initialize Facebook MMS ASR model
-asr_model = pipeline("automatic-speech-recognition", model="facebook/mms-1b-all")
-# Initialize Facebook MMS TTS model
-tts_model = pipeline("text-to-speech", model="facebook/mms-tts")
-# Initialize the Chat Model (Gemma-2-9B or Futuresony.gguf)
 chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")  # Change if needed
 def asr_chat_tts(audio):
     """
-    1. Convert Speech to Text (ASR)
-    2. Process text through Chat Model (LLM)
-    3. Convert response to Speech (TTS)
     """
-    # Step 1: Transcribe speech using Facebook MMS ASR
-    transcription = asr_model(audio)["text"]
     # Step 2: Process text through the chat model
     messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
@@ -30,12 +25,11 @@ def asr_chat_tts(audio):
         token = msg.choices[0].delta.content
         response += token
-    # Step 3: Convert response to speech using Facebook MMS TTS
-    speech = tts_model(response)
-    output_file = "generated_speech.wav"
-    sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
-    return transcription, response, output_file
 # Gradio Interface
 with gr.Blocks() as demo:
@@ -54,4 +48,3 @@ with gr.Blocks() as demo:
 # Run the App
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import subprocess
+import os
 from huggingface_hub import InferenceClient
+# Initialize Chatbot Model (Futuresony.gguf)
 chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf")  # Change if needed
 def asr_chat_tts(audio):
     """
+    1. Convert Speech to Text using asr.py
+    2. Process text through Chat Model (Futuresony.gguf)
+    3. Convert response to Speech using tts.py
     """
+    # Step 1: Run ASR (Speech-to-Text)
+    asr_output = subprocess.run(["python3", "asr.py", audio], capture_output=True, text=True)
+    transcription = asr_output.stdout.strip()
     # Step 2: Process text through the chat model
     messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
         token = msg.choices[0].delta.content
         response += token
+    # Step 3: Run TTS (Text-to-Speech)
+    tts_output_file = "generated_speech.wav"
+    subprocess.run(["python3", "tts.py", response, tts_output_file])
+    return transcription, response, tts_output_file
 # Gradio Interface
 with gr.Blocks() as demo:
 # Run the App
 if __name__ == "__main__":
     demo.launch()