dindizz commited on
Commit
b10c901
·
verified ·
1 Parent(s): 45dc8d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -44
app.py CHANGED
@@ -1,39 +1,14 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
- import soundfile as sf
5
  import openai
6
  from deep_translator import GoogleTranslator
7
  import os
8
 
9
- # Load the Wav2Vec2 model and processor for Tamil
10
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53-tamil")
11
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53-tamil")
12
-
13
  # Initialize OpenAI API
14
  openai.api_key = os.environ.get("OPENAI_API_KEY")
15
 
16
- def transcribe_audio(audio_path):
17
- # Load audio file
18
- audio_input, sample_rate = sf.read(audio_path)
19
-
20
- # Resample if necessary
21
- if sample_rate != 16000:
22
- print(f"Resampling from {sample_rate} to 16000 Hz")
23
- audio_input = librosa.resample(audio_input, sample_rate, 16000)
24
-
25
- # Preprocess the audio
26
- input_values = processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
27
-
28
- # Perform inference
29
- with torch.no_grad():
30
- logits = model(input_values).logits
31
-
32
- # Decode the output
33
- predicted_ids = torch.argmax(logits, dim=-1)
34
- transcription = processor.batch_decode(predicted_ids)[0]
35
-
36
- return transcription
37
 
38
  def get_llm_response(prompt):
39
  response = openai.Completion.create(
@@ -43,16 +18,9 @@ def get_llm_response(prompt):
43
  )
44
  return response.choices[0].text.strip()
45
 
46
- def translate_text(text, target_lang):
47
- translator = GoogleTranslator(source='auto', target=target_lang)
48
- return translator.translate(text)
49
-
50
- def process_voice_input(audio_path, output_lang):
51
- # Transcribe audio
52
- transcription = transcribe_audio(audio_path)
53
-
54
- # Translate transcription to English
55
- english_query = translate_text(transcription, "en")
56
 
57
  # Get response from LLM
58
  llm_response = get_llm_response(english_query)
@@ -60,23 +28,23 @@ def process_voice_input(audio_path, output_lang):
60
  # Translate LLM response to desired output language
61
  final_response = translate_text(llm_response, output_lang)
62
 
63
- return transcription, english_query, llm_response, final_response
64
 
65
  # Gradio interface
66
  iface = gr.Interface(
67
- fn=process_voice_input,
68
  inputs=[
69
- gr.Audio(source="microphone", type="filepath"),
70
  gr.Radio(["ta", "en"], label="Output Language")
71
  ],
72
  outputs=[
73
- gr.Textbox(label="Transcribed Tamil"),
74
  gr.Textbox(label="Translated English Query"),
75
  gr.Textbox(label="LLM Response (English)"),
76
  gr.Textbox(label="Final Response (Tamil/English)")
77
  ],
78
- title="Tamil Voice Assistant",
79
- description="Speak in Tamil, get responses in Tamil or English!"
80
  )
81
 
82
  iface.launch()
 
1
  import gradio as gr
 
 
 
2
  import openai
3
  from deep_translator import GoogleTranslator
4
  import os
5
 
 
 
 
 
6
  # Initialize OpenAI API
7
  openai.api_key = os.environ.get("OPENAI_API_KEY")
8
 
9
+ def translate_text(text, target_lang):
10
+ translator = GoogleTranslator(source='auto', target=target_lang)
11
+ return translator.translate(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def get_llm_response(prompt):
14
  response = openai.Completion.create(
 
18
  )
19
  return response.choices[0].text.strip()
20
 
21
+ def process_input(input_text, output_lang):
22
+ # Translate input to English
23
+ english_query = translate_text(input_text, "en")
 
 
 
 
 
 
 
24
 
25
  # Get response from LLM
26
  llm_response = get_llm_response(english_query)
 
28
  # Translate LLM response to desired output language
29
  final_response = translate_text(llm_response, output_lang)
30
 
31
+ return input_text, english_query, llm_response, final_response
32
 
33
  # Gradio interface
34
  iface = gr.Interface(
35
+ fn=process_input,
36
  inputs=[
37
+ gr.Textbox(label="Input (Tamil or English)"),
38
  gr.Radio(["ta", "en"], label="Output Language")
39
  ],
40
  outputs=[
41
+ gr.Textbox(label="Original Input"),
42
  gr.Textbox(label="Translated English Query"),
43
  gr.Textbox(label="LLM Response (English)"),
44
  gr.Textbox(label="Final Response (Tamil/English)")
45
  ],
46
+ title="Tamil-English Assistant",
47
+ description="Enter text in Tamil or English, get responses in Tamil or English!"
48
  )
49
 
50
  iface.launch()