|
import gradio as gr |
|
import fal_client |
|
import asyncio |
|
import base64 |
|
from io import BytesIO |
|
from PIL import Image |
|
import httpx |
|
|
|
|
|
SYSTEM_PROMPT = """ |
|
You are a kind assistant that compliments people's appearances and acknowledges their hard work for their families or the work they do as a part of the Ultratech Cement Family (if they are a man). |
|
Look at the image and create a short, sincere compliment about: |
|
1. Something positive about how they look (facial features, smile, eyes, etc.) |
|
2. Acknowledge the hard work they likely do for their family |
|
Be warm, genuine, and uplifting without being overly flowery or artificial. |
|
Keep the response to 2-3 sentences maximum. Do not say anything apart from the compliment. |
|
""" |
|
|
|
async def process_image(image, language): |
|
|
|
img_url = fal_client.upload_file(image) |
|
|
|
|
|
prompt = f"Please look at this person and give them a sincere compliment about their appearance and acknowledge their hard work for their family and work for ultratech cement (If they are a man)." |
|
|
|
try: |
|
|
|
result = await fal_client.async_client.run( |
|
"fal-ai/any-llm-open", |
|
arguments={ |
|
"prompt": prompt, |
|
"system_prompt": SYSTEM_PROMPT + f"Make sure to only respond in the {language} language.", |
|
"model": "google/gemini-2.0-flash-001", |
|
"image_url": img_url |
|
} |
|
) |
|
|
|
compliment_text = result["output"] |
|
|
|
|
|
tts_model = "fal-ai/kokoro/american-english" |
|
voice = "af_heart" |
|
if language == "Hindi": |
|
tts_model = "fal-ai/kokoro/hindi" |
|
voice = "hf_alpha" |
|
|
|
|
|
tts_result = await fal_client.async_client.run( |
|
tts_model, |
|
arguments={ |
|
"prompt": compliment_text, |
|
"voice": voice, |
|
} |
|
) |
|
|
|
audio_url = tts_result["audio"]["url"] |
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(audio_url) |
|
if response.status_code == 200: |
|
audio_data = response.content |
|
|
|
|
|
temp_file = "temp_audio.wav" |
|
with open(temp_file, "wb") as f: |
|
f.write(audio_data) |
|
|
|
return compliment_text, temp_file |
|
else: |
|
return compliment_text, None |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}", None |
|
|
|
def process_image_sync(image, language): |
|
return asyncio.run(process_image(image, language)) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Face Reader") |
|
gr.Markdown("Upload a photo of someone, and the app will generate a prediction about them using AI") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
image_input = gr.Image(type="filepath", label="Upload Photo") |
|
language_selector = gr.Radio(["English", "Hindi"], label="Output Language", value="English") |
|
submit_button = gr.Button("Generate Prediction") |
|
|
|
with gr.Column(): |
|
|
|
text_output = gr.Textbox(label="AI Response") |
|
audio_output = gr.Audio(label="AI Prediction", type="filepath") |
|
|
|
|
|
submit_button.click( |
|
fn=process_image_sync, |
|
inputs=[image_input, language_selector], |
|
outputs=[text_output, audio_output] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |