Futuresony commited on
Commit
6c3b85f
·
verified ·
1 Parent(s): 4f467e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -20
app.py CHANGED
@@ -1,25 +1,20 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- import soundfile as sf
4
  from huggingface_hub import InferenceClient
5
 
6
- # Initialize Facebook MMS ASR model
7
- asr_model = pipeline("automatic-speech-recognition", model="facebook/mms-1b-all")
8
-
9
- # Initialize Facebook MMS TTS model
10
- tts_model = pipeline("text-to-speech", model="facebook/mms-tts")
11
-
12
- # Initialize the Chat Model (Gemma-2-9B or Futuresony.gguf)
13
  chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf") # Change if needed
14
 
15
  def asr_chat_tts(audio):
16
  """
17
- 1. Convert Speech to Text (ASR)
18
- 2. Process text through Chat Model (LLM)
19
- 3. Convert response to Speech (TTS)
20
  """
21
- # Step 1: Transcribe speech using Facebook MMS ASR
22
- transcription = asr_model(audio)["text"]
 
23
 
24
  # Step 2: Process text through the chat model
25
  messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
@@ -30,12 +25,11 @@ def asr_chat_tts(audio):
30
  token = msg.choices[0].delta.content
31
  response += token
32
 
33
- # Step 3: Convert response to speech using Facebook MMS TTS
34
- speech = tts_model(response)
35
- output_file = "generated_speech.wav"
36
- sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
37
 
38
- return transcription, response, output_file
39
 
40
  # Gradio Interface
41
  with gr.Blocks() as demo:
@@ -54,4 +48,3 @@ with gr.Blocks() as demo:
54
  # Run the App
55
  if __name__ == "__main__":
56
  demo.launch()
57
-
 
1
  import gradio as gr
2
+ import subprocess
3
+ import os
4
  from huggingface_hub import InferenceClient
5
 
6
+ # Initialize Chatbot Model (Futuresony.gguf)
 
 
 
 
 
 
7
  chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf") # Change if needed
8
 
9
  def asr_chat_tts(audio):
10
  """
11
+ 1. Convert Speech to Text using asr.py
12
+ 2. Process text through Chat Model (Futuresony.gguf)
13
+ 3. Convert response to Speech using tts.py
14
  """
15
+ # Step 1: Run ASR (Speech-to-Text)
16
+ asr_output = subprocess.run(["python3", "asr.py", audio], capture_output=True, text=True)
17
+ transcription = asr_output.stdout.strip()
18
 
19
  # Step 2: Process text through the chat model
20
  messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
 
25
  token = msg.choices[0].delta.content
26
  response += token
27
 
28
+ # Step 3: Run TTS (Text-to-Speech)
29
+ tts_output_file = "generated_speech.wav"
30
+ subprocess.run(["python3", "tts.py", response, tts_output_file])
 
31
 
32
+ return transcription, response, tts_output_file
33
 
34
  # Gradio Interface
35
  with gr.Blocks() as demo:
 
48
  # Run the App
49
  if __name__ == "__main__":
50
  demo.launch()