Spaces:
Runtime error
Runtime error
File size: 4,869 Bytes
cf60fe5 ceb97ed cf60fe5 ceb97ed cf60fe5 ceb97ed 49fcce4 cf60fe5 6aaf1fe cf60fe5 49fcce4 cf60fe5 ef97e86 ceb97ed 49fcce4 cf60fe5 ceb97ed 49fcce4 ceb97ed 49fcce4 ceb97ed 49fcce4 ceb97ed 49fcce4 ceb97ed 49fcce4 cf60fe5 ceb97ed 49fcce4 ceb97ed 49fcce4 fa84648 ceb97ed fa84648 ceb97ed 49fcce4 cf60fe5 ceb97ed cf60fe5 49fcce4 cf60fe5 49fcce4 cf60fe5 ceb97ed cf60fe5 49fcce4 cf60fe5 49fcce4 cf60fe5 49fcce4 cf60fe5 ceb97ed cf60fe5 49fcce4 cf60fe5 49fcce4 cf60fe5 9c51fc1 fa84648 cf60fe5 074aec9 cf60fe5 fa84648 cf60fe5 49fcce4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import whisper
import gradio as gr
from groq import Groq
from deep_translator import GoogleTranslator
from diffusers import StableDiffusionPipeline
import os
import torch
import openai
from huggingface_hub import InferenceApi
from PIL import Image
import requests
import io
import time
# Set up Groq API key
api_key = os.getenv("g_key")
client = Groq(api_key=api_key)
# Hugging Face API details for image generation
H_key = os.getenv("h_key")
API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
headers = {"Authorization": f"Bearer {H_key}"}
# Function for querying image generation with retries
def query_image_generation(payload, max_retries=5):
for attempt in range(max_retries):
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 503:
print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
estimated_time = min(response.json().get("estimated_time", 60), 60)
time.sleep(estimated_time)
continue
if response.status_code != 200:
print(f"Error: Received status code {response.status_code}")
print(f"Response: {response.text}")
return None
return response.content
print(f"Failed to generate image after {max_retries} attempts.")
return None
# Function for generating an image from text
def generate_image(prompt):
image_bytes = query_image_generation({"inputs": prompt})
if image_bytes is None:
return None
try:
image = Image.open(io.BytesIO(image_bytes)) # Opening the image from bytes
return image
except Exception as e:
print(f"Error: {e}")
return None
# Updated function for text generation using the new API structure
def generate_creative_text(prompt):
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content":prompt}
],
model="llama-3.2-90b-text-preview"
)
chatbot_response = chat_completion.choices[0].message.content
return chatbot_response
def process_audio(audio_path, image_option, creative_text_option):
if audio_path is None:
return "Please upload an audio file.", None, None, None
# Step 1: Transcribe audio
try:
with open(audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(os.path.basename(audio_path), file.read()),
model="whisper-large-v3",
language="ta",
response_format="verbose_json",
)
tamil_text = transcription.text
except Exception as e:
return f"An error occurred during transcription: {str(e)}", None, None, None
# Step 2: Translate Tamil to English
try:
translator = GoogleTranslator(source='ta', target='en')
translation = translator.translate(tamil_text)
except Exception as e:
return tamil_text, f"An error occurred during translation: {str(e)}", None, None
# Step 3: Generate creative text (if selected)
creative_text = None
if creative_text_option == "Generate Creative Text":
creative_text = generate_creative_text(translation)
# Step 4: Generate image (if selected)
image = None
if image_option == "Generate Image":
image = generate_image(translation)
if image is None:
return tamil_text, translation, creative_text, f"An error occurred during image generation"
return tamil_text, translation, creative_text, image
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Base()) as iface:
gr.Markdown("# Multimodal Application for Vernacular Language Translation and Image Synthesis")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
submit_button = gr.Button("Click here to generate")
with gr.Column():
tamil_text_output = gr.Textbox(label="Tamil Transcription")
translation_output = gr.Textbox(label="English Translation")
creative_text_output = gr.Textbox(label="Creative Text")
image_output = gr.Image(label="Generated Image")
submit_button.click(
fn=process_audio,
inputs=[audio_input, image_option, creative_text_option],
outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
)
# Launch the interface
iface.launch() |