Spaces:

Warlord-K
/

AI-Teller

Sleeping

App Files Files Community

AI-Teller / app.py

Warlord-K

Update app.py

f8ecbb8 verified 5 months ago

raw

history blame contribute delete

3.83 kB

	import gradio as gr
	import fal_client
	import asyncio
	import base64
	from io import BytesIO
	from PIL import Image
	import httpx

	# System prompt to guide the model
	SYSTEM_PROMPT = """
	You are a kind assistant that compliments people's appearances and acknowledges their hard work for their families or the work they do as a part of the Ultratech Cement Family (if they are a man).
	Look at the image and create a short, sincere compliment about:
	1. Something positive about how they look (facial features, smile, eyes, etc.)
	2. Acknowledge the hard work they likely do for their family
	Be warm, genuine, and uplifting without being overly flowery or artificial.
	Keep the response to 2-3 sentences maximum. Do not say anything apart from the compliment.
	"""

	async def process_image(image, language):
	# Convert image to base64
	img_url = fal_client.upload_file(image)

	# Craft the prompt for the vision model
	prompt = f"Please look at this person and give them a sincere compliment about their appearance and acknowledge their hard work for their family and work for ultratech cement (If they are a man)."

	try:
	# Call FAL API to get text description from image
	result = await fal_client.async_client.run(
	"fal-ai/any-llm-open",
	arguments={
	"prompt": prompt,
	"system_prompt": SYSTEM_PROMPT + f"Make sure to only respond in the {language} language.",
	"model": "google/gemini-2.0-flash-001",
	"image_url": img_url
	}
	)

	compliment_text = result["output"]

	# Select TTS model based on language
	tts_model = "fal-ai/kokoro/american-english"
	voice = "af_heart"
	if language == "Hindi":
	tts_model = "fal-ai/kokoro/hindi"
	voice = "hf_alpha"

	# Call FAL API for text-to-speech
	tts_result = await fal_client.async_client.run(
	tts_model,
	arguments={
	"prompt": compliment_text,
	"voice": voice,
	}
	)

	audio_url = tts_result["audio"]["url"]

	# Download the audio file
	async with httpx.AsyncClient() as client:
	response = await client.get(audio_url)
	if response.status_code == 200:
	audio_data = response.content

	# Save the audio to a temporary file
	temp_file = "temp_audio.wav"
	with open(temp_file, "wb") as f:
	f.write(audio_data)

	return compliment_text, temp_file
	else:
	return compliment_text, None

	except Exception as e:
	return f"Error: {str(e)}", None

	def process_image_sync(image, language):
	return asyncio.run(process_image(image, language))

	with gr.Blocks() as demo:
	gr.Markdown("# Face Reader")
	gr.Markdown("Upload a photo of someone, and the app will generate a prediction about them using AI")

	with gr.Row():
	with gr.Column():
	# Input components
	image_input = gr.Image(type="filepath", label="Upload Photo")
	language_selector = gr.Radio(["English", "Hindi"], label="Output Language", value="English")
	submit_button = gr.Button("Generate Prediction")

	with gr.Column():
	# Output components
	text_output = gr.Textbox(label="AI Response")
	audio_output = gr.Audio(label="AI Prediction", type="filepath")

	# Set up the event
	submit_button.click(
	fn=process_image_sync,
	inputs=[image_input, language_selector],
	outputs=[text_output, audio_output]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()