Spaces:

Futuresony
/

Speech-recognition

Sleeping

App Files Files Community

Futuresony commited on Feb 10

Commit

fb3c8c4

verified ·

1 Parent(s): ac0a905

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -37

app.py CHANGED Viewed

@@ -1,45 +1,31 @@
 import gradio as gr
-from asr import transcribe_audio  # Your ASR function
-from lid import detect_language  # Your Language Identification function
-from tts import text_to_speech  # Your TTS function
-from transformers import pipeline
-# Load the text generation model (adjust this based on your model type)
-text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")
-# Function to process input
-def process_input(input_text=None, audio=None):
-    if audio:  # If audio is provided, convert it to text
-        input_text = transcribe_audio(audio)
-    if not input_text:
-        return "No input provided", None
-    # Detect language
-    lang = detect_language(input_text)
-    # Generate text using the model
-    output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
-    # Convert output text to speech
-    output_audio = text_to_speech(output_text, lang)
-    return output_text, output_audio
-# Create Gradio interface
 interface = gr.Interface(
-    fn=process_input,
-    inputs=[
-        gr.Textbox(label="Enter Text", placeholder="Type here..."),
-        gr.Audio(source="microphone", type="filepath", label="Record Audio")
-    ],
-    outputs=[
-        gr.Textbox(label="Generated Text"),
-        gr.Audio(label="Generated Speech")
-    ],
-    title="Speech-to-Text AI Chat",
-    description="Input text or record audio, and the AI will respond with generated text and speech."
 )
-# Run the demo
-interface.launch()

 import gradio as gr
+from asr import transcribe_auto  # Import your ASR function
+from lid import detect_language  # Import your Language Detection function
+from tts import generate_audio  # Import your TTS function
+def process_audio(audio_data):
+    # Step 1: Perform ASR (Audio-to-Text)
+    transcription = transcribe_auto(audio_data)
+    # Step 2: Detect language
+    language = detect_language(audio_data)
+    # Step 3: Generate Text Response based on ASR result (Future model generation)
+    # Replace this with your model inference logic
+    generated_text = f"Detected Language: {language}\n\nTranscription: {transcription}"
+    # Step 4: Convert generated text into speech using TTS
+    speech_output = generate_audio(generated_text)
+    return generated_text, speech_output
+# Define the Gradio Interface
 interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(source="microphone", type="numpy"),  # Can either record or upload audio
+    outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech")],
+    live=True
 )
+if __name__ == "__main__":
+    interface.launch()