Spaces:

Futuresony
/

Speech-recognition

Sleeping

File size: 1,488 Bytes

a93487d

import gradio as gr
from asr import transcribe_audio  # Your ASR function
from lid import detect_language  # Your Language Identification function
from tts import text_to_speech  # Your TTS function
from transformers import pipeline

# Load the text generation model (adjust this based on your model type)
text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")

# Function to process input
def process_input(input_text=None, audio=None):
    if audio:  # If audio is provided, convert it to text
        input_text = transcribe_audio(audio)

    if not input_text:
        return "No input provided", None
    
    # Detect language
    lang = detect_language(input_text)
    
    # Generate text using the model
    output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']

    # Convert output text to speech
    output_audio = text_to_speech(output_text, lang)

    return output_text, output_audio

# Create Gradio interface
interface = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Textbox(label="Enter Text", placeholder="Type here..."),
        gr.Audio(source="microphone", type="filepath", label="Record Audio")
    ],
    outputs=[
        gr.Textbox(label="Generated Text"),
        gr.Audio(label="Generated Speech")
    ],
    title="Speech-to-Text AI Chat",
    description="Input text or record audio, and the AI will respond with generated text and speech."
)

# Run the demo
interface.launch()