Futuresony commited on
Commit
fb3c8c4
·
verified ·
1 Parent(s): ac0a905

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -37
app.py CHANGED
@@ -1,45 +1,31 @@
1
  import gradio as gr
2
- from asr import transcribe_audio # Your ASR function
3
- from lid import detect_language # Your Language Identification function
4
- from tts import text_to_speech # Your TTS function
5
- from transformers import pipeline
6
 
7
- # Load the text generation model (adjust this based on your model type)
8
- text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf")
9
-
10
- # Function to process input
11
- def process_input(input_text=None, audio=None):
12
- if audio: # If audio is provided, convert it to text
13
- input_text = transcribe_audio(audio)
14
-
15
- if not input_text:
16
- return "No input provided", None
17
 
18
- # Detect language
19
- lang = detect_language(input_text)
20
 
21
- # Generate text using the model
22
- output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text']
23
-
24
- # Convert output text to speech
25
- output_audio = text_to_speech(output_text, lang)
26
-
27
- return output_text, output_audio
 
28
 
29
- # Create Gradio interface
30
  interface = gr.Interface(
31
- fn=process_input,
32
- inputs=[
33
- gr.Textbox(label="Enter Text", placeholder="Type here..."),
34
- gr.Audio(source="microphone", type="filepath", label="Record Audio")
35
- ],
36
- outputs=[
37
- gr.Textbox(label="Generated Text"),
38
- gr.Audio(label="Generated Speech")
39
- ],
40
- title="Speech-to-Text AI Chat",
41
- description="Input text or record audio, and the AI will respond with generated text and speech."
42
  )
43
 
44
- # Run the demo
45
- interface.launch()
 
1
  import gradio as gr
2
+ from asr import transcribe_auto # Import your ASR function
3
+ from lid import detect_language # Import your Language Detection function
4
+ from tts import generate_audio # Import your TTS function
 
5
 
6
+ def process_audio(audio_data):
7
+ # Step 1: Perform ASR (Audio-to-Text)
8
+ transcription = transcribe_auto(audio_data)
 
 
 
 
 
 
 
9
 
10
+ # Step 2: Detect language
11
+ language = detect_language(audio_data)
12
 
13
+ # Step 3: Generate Text Response based on ASR result (Future model generation)
14
+ # Replace this with your model inference logic
15
+ generated_text = f"Detected Language: {language}\n\nTranscription: {transcription}"
16
+
17
+ # Step 4: Convert generated text into speech using TTS
18
+ speech_output = generate_audio(generated_text)
19
+
20
+ return generated_text, speech_output
21
 
22
+ # Define the Gradio Interface
23
  interface = gr.Interface(
24
+ fn=process_audio,
25
+ inputs=gr.Audio(source="microphone", type="numpy"), # Can either record or upload audio
26
+ outputs=[gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech")],
27
+ live=True
 
 
 
 
 
 
 
28
  )
29
 
30
+ if __name__ == "__main__":
31
+ interface.launch()