Spaces:

anbarasanGT
/

tamil-audio-translator-image-generator

Runtime error

App Files Files Community

anbarasanGT commited on Oct 24, 2024

Commit

ceb97ed

verified ·

1 Parent(s): 3271c7e

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -68

app.py CHANGED Viewed

@@ -1,98 +1,127 @@
-# prompt: i need to add feature in Load GPT-Neo for creative text generation in english when the Translate Tamil to English at this Audio Transcription, Translation, and Image Generation
 import whisper
 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
-from diffusers import StableDiffusionPipeline
-import os
 import torch
-import openai
-api_key ="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"
 client = Groq(api_key=api_key)
-model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
-pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
-pipe = pipe.to("cuda")
-# Updated function for text generation using the new API structure
-def generate_creative_text(prompt):
-    chat_completion = client.chat.completions.create(
-                messages=[
-                    {"role": "user", "content":prompt}
-                ],
-                model="llama-3.2-90b-text-preview"
-            )
-    chatbot_response = chat_completion.choices[0].message.content
-    return chatbot_response
-def process_audio(audio_path, image_option, creative_text_option):
-    if audio_path is None:
-        return "Please upload an audio file.", None, None, None
-    # Step 1: Transcribe audio
-    try:
-        with open(audio_path, "rb") as file:
-            transcription = client.audio.transcriptions.create(
-                file=(os.path.basename(audio_path), file.read()),
-                model="whisper-large-v3",
-                language="ta",
-                response_format="verbose_json",
-            )
-        tamil_text = transcription.text
-    except Exception as e:
-        return f"An error occurred during transcription: {str(e)}", None, None, None
-    # Step 2: Translate Tamil to English
     try:
-        translator = GoogleTranslator(source='ta', target='en')
-        translation = translator.translate(tamil_text)
     except Exception as e:
-        return tamil_text, f"An error occurred during translation: {str(e)}", None, None
-    # Step 3: Generate creative text (if selected)
-    creative_text = None
-    if creative_text_option == "Generate Creative Text":
-        creative_text = generate_creative_text(translation)
-    # Step 4: Generate image (if selected)
-    image = None
-    if image_option == "Generate Image":
         try:
-            model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
-            pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
-            pipe = pipe.to("cuda")
-            image = pipe(translation).images[0]
         except Exception as e:
-            return tamil_text, translation, creative_text, f"An error occurred during image generation: {str(e)}"
-    return tamil_text, translation, creative_text, image
-# Create Gradio interface
-with gr.Blocks(theme=gr.themes.Base()) as iface:
-    gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
     with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
-            image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
-            creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
-            submit_button = gr.Button("Process Audio")
-        with gr.Column():
-            tamil_text_output = gr.Textbox(label="Tamil Transcription")
-            translation_output = gr.Textbox(label="English Translation")
-            creative_text_output = gr.Textbox(label="Creative Text")
             image_output = gr.Image(label="Generated Image")
-    submit_button.click(
-        fn=process_audio,
-        inputs=[audio_input, image_option, creative_text_option],
-        outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
-    )
-# Launch the interface
 iface.launch()

+import gradio as gr
+import os
+from deep_translator import GoogleTranslator
+from PIL import Image
+import requests
+import io
+import time
 import whisper
 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
 import torch
+# Replace with your actual Hugging Face API details
+os.environ['hf_key']
+key = os.getenv('hf_key')
+API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
+headers = {"Authorization": f"Bearer {key}"}
+os.environ['grog_key']
+api_key = os.getenv('grog_key')
 client = Groq(api_key=api_key)
+def query_image_generation(payload, max_retries=5):
+    for attempt in range(max_retries):
+        response = requests.post(API_URL, headers=headers, json=payload)
+        if response.status_code == 503:
+            print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
+            estimated_time = min(response.json().get("estimated_time", 60), 60)
+            time.sleep(estimated_time)
+            continue
+        if response.status_code != 200:
+            print(f"Error: Received status code {response.status_code}")
+            print(f"Response: {response.text}")
+            return None
+        return response.content
+    print(f"Failed to generate image after {max_retries} attempts.")
+    return None
+def generate_image(prompt):
+    image_bytes = query_image_generation({"inputs": prompt})
+    if image_bytes is None:
+        return None
     try:
+        image = Image.open(io.BytesIO(image_bytes))  # Opening the image from bytes
+        return image
     except Exception as e:
+        print(f"Error: {e}")
+        return None
+def process_audio_or_text(input_text, audio_path, generate_image_flag):
+    tamil_text, translation, image = None, None, None
+    if audio_path:  # Prefer audio input
+        try:
+            with open(audio_path, "rb") as file:
+                transcription = client.audio.transcriptions.create(
+                    file=(os.path.basename(audio_path), file.read()),
+                    model="whisper-large-v3",
+                    language="ta",
+                    response_format="verbose_json",
+                )
+            tamil_text = transcription.text
+        except Exception as e:
+            return f"An error occurred during transcription: {str(e)}", None, None
         try:
+            translator = GoogleTranslator(source='ta', target='en')
+            translation = translator.translate(tamil_text)
         except Exception as e:
+            return tamil_text, f"An error occurred during translation: {str(e)}", None
+    elif input_text:  # No audio input, so use text input
+        translation = input_text
+    # Generate chatbot response
+    try:
+        chat_completion = client.chat.completions.create(
+                messages=[{"role": "user", "content": translation}],
+                model="llama-3.2-90b-text-preview"
+            )
+        chatbot_response = chat_completion.choices[0].message.content
+    except Exception as e:
+        return None, f"An error occurred during chatbot interaction: {str(e)}", None
+    if generate_image_flag:  # Generate image if the checkbox is checked
+        image = generate_image(translation)
+    return tamil_text, chatbot_response, image  # Return both chatbot response and image (if generated)
+with gr.Blocks() as iface:
+    gr.Markdown("# AI Chatbot and Image Generation App")
     with gr.Row():
+        with gr.Column(scale=1):  # Left side (Inputs and Buttons)
+            user_input = gr.Textbox(label="Enter Tamil text", placeholder="Type your message here...")
+            audio_input = gr.Audio(type="filepath", label=" Or upload audio (for Image Generation)")
+            image_generation_checkbox = gr.Checkbox(label="Generate Image", value=False)
+            # Buttons
+            submit_btn = gr.Button("Submit")
+            clear_btn = gr.Button("Clear")
+        with gr.Column(scale=1):  # Right side (Outputs)
+            text_output_1 = gr.Textbox(label="Tamil Transcription / Chatbot Response", interactive=False)
+            text_output_2 = gr.Textbox(label="English Translation", interactive=False)
             image_output = gr.Image(label="Generated Image")
+    # Connect the buttons to the functions
+    submit_btn.click(fn=process_audio_or_text,
+                     inputs=[user_input, audio_input, image_generation_checkbox],
+                     outputs=[text_output_1, text_output_2, image_output])
+    clear_btn.click(lambda: ("", None, False, "", "", None),
+                    inputs=[],
+                    outputs=[user_input, audio_input, image_generation_checkbox, text_output_1, text_output_2, image_output])
 iface.launch()