Abbas0786 commited on
Commit
37717e6
·
verified ·
1 Parent(s): e576eab

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
+ import io
6
+ from groq import Groq
7
+ import time
8
+
9
+ # Ensure GROQ_API_KEY is defined
10
+ GROQ_API_KEY ="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"
11
+ if not GROQ_API_KEY:
12
+ raise ValueError("GROQ_API_KEY is not set in environment variables.")
13
+
14
+ # Initialize the Groq client
15
+ client = Groq(api_key=GROQ_API_KEY)
16
+
17
+ # Load the Whisper model
18
+ model = whisper.load_model("base") # Ensure this model supports Urdu; otherwise, choose a suitable model
19
+
20
+ def process_audio(file_path):
21
+ try:
22
+ # Load the audio file
23
+ audio = whisper.load_audio(file_path)
24
+
25
+ # Transcribe the audio using Whisper (specify language if needed)
26
+ result = model.transcribe(audio, language="ur") # Specify 'ur' for Urdu
27
+ text = result["text"]
28
+
29
+ # Generate a response in Urdu using Groq
30
+ chat_completion = client.chat.completions.create(
31
+ messages=[{"role": "user", "content": text}],
32
+ model="llama3-8b-8192", # Ensure this model can handle Urdu
33
+ )
34
+
35
+ # Access the response using dot notation
36
+ response_message = chat_completion.choices[0].message.content.strip()
37
+
38
+ # Convert the response text to Urdu speech
39
+ tts = gTTS(response_message, lang='ur') # Specify language 'ur' for Urdu
40
+ response_audio_io = io.BytesIO()
41
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
42
+ response_audio_io.seek(0)
43
+
44
+ # Generate a unique filename
45
+ response_audio_path = "response_" + str(int(time.time())) + ".mp3"
46
+
47
+ # Save audio to a file
48
+ with open(response_audio_path, "wb") as audio_file:
49
+ audio_file.write(response_audio_io.getvalue())
50
+
51
+ # Return the response text and the path to the saved audio file
52
+ return response_message, response_audio_path
53
+
54
+ except Exception as e:
55
+ return f"An error occurred: {e}", None
56
+
57
+ iface = gr.Interface(
58
+ fn=process_audio,
59
+ inputs=gr.Audio(type="filepath"), # Use type="filepath"
60
+ outputs=[gr.Textbox(label="Response Text (Urdu)"), gr.Audio(label="Response Audio (Urdu)")],
61
+ live=True # Set to False if you do not need real-time updates
62
+ )
63
+
64
+ iface.launch()