dindizz commited on
Commit
0393fbc
·
verified ·
1 Parent(s): 5f3262f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import openai
4
+ import speech_recognition as sr
5
+ from gtts import gTTS
6
+ from deep_translator import GoogleTranslator
7
+
8
+ def translate_text(text, target_lang):
9
+ translator = GoogleTranslator(source='auto', target=target_lang)
10
+ return translator.translate(text)
11
+
12
+ def get_llm_response(prompt):
13
+ response = openai.ChatCompletion.create(
14
+ model="gpt-4-0125-preview",
15
+ messages=[
16
+ {"role": "system", "content": "You are a helpful assistant that provides informative and concise responses."},
17
+ {"role": "user", "content": prompt}
18
+ ],
19
+ max_tokens=150
20
+ )
21
+ return response.choices[0].message.content.strip()
22
+
23
+ def process_voice_or_text(input_audio, input_text, output_lang):
24
+ if output_lang not in ["ta", "en"]:
25
+ return "Invalid output language selected. Please choose either Tamil ('ta') or English ('en').", "", "", "", None
26
+
27
+ if input_audio is not None:
28
+ # Process audio input
29
+ recognizer = sr.Recognizer()
30
+ with sr.AudioFile(input_audio) as source:
31
+ audio = recognizer.record(source)
32
+ try:
33
+ input_text = recognizer.recognize_google(audio, language="ta-IN,en-IN")
34
+ except sr.UnknownValueError:
35
+ return "Could not understand the audio input.", "", "", "", None
36
+ except sr.RequestError as e:
37
+ return f"Speech recognition error: {e}", "", "", "", None
38
+
39
+ if not input_text:
40
+ return "Please provide a valid input.", "", "", "", None
41
+
42
+ # Translate input to English
43
+ english_query = translate_text(input_text, "en")
44
+
45
+ # Get response from LLM
46
+ llm_response = get_llm_response(english_query)
47
+
48
+ # Translate LLM response to desired output language
49
+ final_response = translate_text(llm_response, output_lang)
50
+
51
+ # Generate audio output
52
+ audio_file = text_to_speech(final_response, output_lang)
53
+
54
+ return input_text, english_query, llm_response, final_response, audio_file
55
+
56
+ def text_to_speech(response, lang):
57
+ lang_code = "ta" if lang == "ta" else "en"
58
+ tts = gTTS(text=response, lang=lang_code)
59
+ audio_file = "response.mp3"
60
+ tts.save(audio_file)
61
+ return audio_file
62
+
63
+ # Gradio interface
64
+ iface = gr.Interface(
65
+ fn=process_voice_or_text,
66
+ inputs=[
67
+ gr.Audio(type="filepath", label="Voice Input (Tamil or English)"),
68
+ gr.Textbox(label="Text Input (Tamil or English)", placeholder="Type your input here..."),
69
+ gr.Radio(["ta", "en"], label="Output Language", value="en")
70
+ ],
71
+ outputs=[
72
+ gr.Textbox(label="Original Input"),
73
+ gr.Textbox(label="Translated English Query"),
74
+ gr.Textbox(label="LLM Response (English)"),
75
+ gr.Textbox(label="Final Response (Tamil/English)"),
76
+ gr.Audio(type="filepath", label="Audio Output (Tamil/English)")
77
+ ],
78
+ live=True,
79
+ title="Nisha - Tamil-English Voice Assistant",
80
+ description="Speak or type in Tamil or English, and get responses in your preferred language as text or audio!"
81
+ )
82
+
83
+ iface.launch()