anbarasanGT commited on
Commit
5060492
·
verified ·
1 Parent(s): 303942c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ from groq import Groq
4
+ from deep_translator import GoogleTranslator
5
+ from diffusers import StableDiffusionPipeline
6
+ import os
7
+ import torch
8
+ import openai
9
+ from huggingface_hub import InferenceApi
10
+ from PIL import Image
11
+ import requests
12
+ import io
13
+ import time
14
+
15
+ # Set up Groq API key
16
+ api_key = os.getenv("groq_key")
17
+ client = Groq(api_key=api_key)
18
+
19
+ # Hugging Face API details for image generation
20
+ H_key = os.getenv("hf_key")
21
+ API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
22
+ headers = {"Authorization": f"Bearer {H_key}"}
23
+
24
+
25
+ # Function for querying image generation with retries
26
+ def query_image_generation(payload, max_retries=5):
27
+ for attempt in range(max_retries):
28
+ response = requests.post(API_URL, headers=headers, json=payload)
29
+
30
+ if response.status_code == 503:
31
+ print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
32
+ estimated_time = min(response.json().get("estimated_time", 60), 60)
33
+ time.sleep(estimated_time)
34
+ continue
35
+
36
+ if response.status_code != 200:
37
+ print(f"Error: Received status code {response.status_code}")
38
+ print(f"Response: {response.text}")
39
+ return None
40
+
41
+ return response.content
42
+
43
+ print(f"Failed to generate image after {max_retries} attempts.")
44
+ return None
45
+
46
+ # Function for generating an image from text
47
+ def generate_image(prompt):
48
+ image_bytes = query_image_generation({"inputs": prompt})
49
+
50
+ if image_bytes is None:
51
+ return None
52
+
53
+ try:
54
+ image = Image.open(io.BytesIO(image_bytes)) # Opening the image from bytes
55
+ return image
56
+ except Exception as e:
57
+ print(f"Error: {e}")
58
+ return None
59
+
60
+
61
+ # Updated function for text generation using the new API structure
62
+ def generate_creative_text(prompt):
63
+ chat_completion = client.chat.completions.create(
64
+ messages=[
65
+ {"role": "user", "content":prompt}
66
+ ],
67
+ model="llama-3.2-90b-text-preview"
68
+ )
69
+ chatbot_response = chat_completion.choices[0].message.content
70
+ return chatbot_response
71
+
72
+
73
+ def process_audio(audio_path, image_option, creative_text_option):
74
+ if audio_path is None:
75
+ return "Please upload an audio file.", None, None, None
76
+
77
+ # Step 1: Transcribe audio
78
+ try:
79
+ with open(audio_path, "rb") as file:
80
+ transcription = client.audio.transcriptions.create(
81
+ file=(os.path.basename(audio_path), file.read()),
82
+ model="whisper-large-v3",
83
+ language="ta",
84
+ response_format="verbose_json",
85
+ )
86
+ tamil_text = transcription.text
87
+ except Exception as e:
88
+ return f"An error occurred during transcription: {str(e)}", None, None, None
89
+
90
+ # Step 2: Translate Tamil to English
91
+ try:
92
+ translator = GoogleTranslator(source='ta', target='en')
93
+ translation = translator.translate(tamil_text)
94
+ except Exception as e:
95
+ return tamil_text, f"An error occurred during translation: {str(e)}", None, None
96
+
97
+ # Step 3: Generate creative text (if selected)
98
+ creative_text = None
99
+ if creative_text_option == "Generate Creative Text":
100
+ creative_text = generate_creative_text(translation)
101
+
102
+ # Step 4: Generate image (if selected)
103
+ image = None
104
+ if image_option == "Generate Image":
105
+ image = generate_image(translation)
106
+ if image is None:
107
+ return tamil_text, translation, creative_text, f"An error occurred during image generation"
108
+
109
+ return tamil_text, translation, creative_text, image
110
+
111
+
112
+ # Create Gradio interface
113
+ with gr.Blocks(theme=gr.themes.Base()) as iface:
114
+ gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
115
+ with gr.Row():
116
+ with gr.Column():
117
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File")
118
+ image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
119
+ creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
120
+ submit_button = gr.Button("Process Audio")
121
+ with gr.Column():
122
+ tamil_text_output = gr.Textbox(label="Tamil Transcription")
123
+ translation_output = gr.Textbox(label="English Translation")
124
+ creative_text_output = gr.Textbox(label="Creative Text")
125
+ image_output = gr.Image(label="Generated Image")
126
+ submit_button.click(
127
+ fn=process_audio,
128
+ inputs=[audio_input, image_option, creative_text_option],
129
+ outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
130
+ )
131
+
132
+ # Launch the interface
133
+ iface.launch()