Futuresony's picture
Rename app.py(bad) to app.py
a93487d verified
raw
history blame
1.49 kB
import gradio as gr
from asr import transcribe_audio # Your ASR function
from lid import detect_language # Your Language Identification function
from tts import text_to_speech # Your TTS function
from transformers import pipeline
# Load the text generation model (adjust this based on your model type)
text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")
# Function to process input
def process_input(input_text=None, audio=None):
if audio: # If audio is provided, convert it to text
input_text = transcribe_audio(audio)
if not input_text:
return "No input provided", None
# Detect language
lang = detect_language(input_text)
# Generate text using the model
output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
# Convert output text to speech
output_audio = text_to_speech(output_text, lang)
return output_text, output_audio
# Create Gradio interface
interface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Enter Text", placeholder="Type here..."),
gr.Audio(source="microphone", type="filepath", label="Record Audio")
],
outputs=[
gr.Textbox(label="Generated Text"),
gr.Audio(label="Generated Speech")
],
title="Speech-to-Text AI Chat",
description="Input text or record audio, and the AI will respond with generated text and speech."
)
# Run the demo
interface.launch()