Spaces:

Futuresony
/

Speech-recognition

Sleeping

Speech-recognition / app.py

Rename app.py(bad) to app.py

a93487d verified 4 months ago

1.49 kB

	import gradio as gr
	from asr import transcribe_audio # Your ASR function
	from lid import detect_language # Your Language Identification function
	from tts import text_to_speech # Your TTS function
	from transformers import pipeline

	# Load the text generation model (adjust this based on your model type)
	text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")

	# Function to process input
	def process_input(input_text=None, audio=None):
	if audio: # If audio is provided, convert it to text
	input_text = transcribe_audio(audio)

	if not input_text:
	return "No input provided", None

	# Detect language
	lang = detect_language(input_text)

	# Generate text using the model
	output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']

	# Convert output text to speech
	output_audio = text_to_speech(output_text, lang)

	return output_text, output_audio

	# Create Gradio interface
	interface = gr.Interface(
	fn=process_input,
	inputs=[
	gr.Textbox(label="Enter Text", placeholder="Type here..."),
	gr.Audio(source="microphone", type="filepath", label="Record Audio")
	],
	outputs=[
	gr.Textbox(label="Generated Text"),
	gr.Audio(label="Generated Speech")
	],
	title="Speech-to-Text AI Chat",
	description="Input text or record audio, and the AI will respond with generated text and speech."
	)

	# Run the demo
	interface.launch()