Spaces:
Sleeping
Sleeping
File size: 1,488 Bytes
a93487d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from asr import transcribe_audio # Your ASR function
from lid import detect_language # Your Language Identification function
from tts import text_to_speech # Your TTS function
from transformers import pipeline
# Load the text generation model (adjust this based on your model type)
text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")
# Function to process input
def process_input(input_text=None, audio=None):
if audio: # If audio is provided, convert it to text
input_text = transcribe_audio(audio)
if not input_text:
return "No input provided", None
# Detect language
lang = detect_language(input_text)
# Generate text using the model
output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
# Convert output text to speech
output_audio = text_to_speech(output_text, lang)
return output_text, output_audio
# Create Gradio interface
interface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Enter Text", placeholder="Type here..."),
gr.Audio(source="microphone", type="filepath", label="Record Audio")
],
outputs=[
gr.Textbox(label="Generated Text"),
gr.Audio(label="Generated Speech")
],
title="Speech-to-Text AI Chat",
description="Input text or record audio, and the AI will respond with generated text and speech."
)
# Run the demo
interface.launch()
|