File size: 3,024 Bytes
0393fbc
 
 
 
 
 
 
 
 
 
 
 
 
bd97668
0393fbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import os
import openai
import speech_recognition as sr
from gtts import gTTS
from deep_translator import GoogleTranslator

def translate_text(text, target_lang):
    translator = GoogleTranslator(source='auto', target=target_lang)
    return translator.translate(text)

def get_llm_response(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini-2024-07-18",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that provides informative and concise responses."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    return response.choices[0].message.content.strip()

def process_voice_or_text(input_audio, input_text, output_lang):
    if output_lang not in ["ta", "en"]:
        return "Invalid output language selected. Please choose either Tamil ('ta') or English ('en').", "", "", "", None

    if input_audio is not None:
        # Process audio input
        recognizer = sr.Recognizer()
        with sr.AudioFile(input_audio) as source:
            audio = recognizer.record(source)
        try:
            input_text = recognizer.recognize_google(audio, language="ta-IN,en-IN")
        except sr.UnknownValueError:
            return "Could not understand the audio input.", "", "", "", None
        except sr.RequestError as e:
            return f"Speech recognition error: {e}", "", "", "", None

    if not input_text:
        return "Please provide a valid input.", "", "", "", None

    # Translate input to English
    english_query = translate_text(input_text, "en")

    # Get response from LLM
    llm_response = get_llm_response(english_query)

    # Translate LLM response to desired output language
    final_response = translate_text(llm_response, output_lang)

    # Generate audio output
    audio_file = text_to_speech(final_response, output_lang)

    return input_text, english_query, llm_response, final_response, audio_file

def text_to_speech(response, lang):
    lang_code = "ta" if lang == "ta" else "en"
    tts = gTTS(text=response, lang=lang_code)
    audio_file = "response.mp3"
    tts.save(audio_file)
    return audio_file

# Gradio interface
iface = gr.Interface(
    fn=process_voice_or_text,
    inputs=[
        gr.Audio(type="filepath", label="Voice Input (Tamil or English)"),
        gr.Textbox(label="Text Input (Tamil or English)", placeholder="Type your input here..."),
        gr.Radio(["ta", "en"], label="Output Language", value="en")
    ],
    outputs=[
        gr.Textbox(label="Original Input"),
        gr.Textbox(label="Translated English Query"),
        gr.Textbox(label="LLM Response (English)"),
        gr.Textbox(label="Final Response (Tamil/English)"),
        gr.Audio(type="filepath", label="Audio Output (Tamil/English)")
    ],
    live=True,
    title="Nisha - Tamil-English Voice Assistant",
    description="Speak or type in Tamil or English, and get responses in your preferred language as text or audio!"
)

iface.launch()