Pragnakal commited on
Commit
0f9209d
·
verified ·
1 Parent(s): 0f744d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -26
app.py CHANGED
@@ -1,36 +1,31 @@
1
  import gradio as gr
2
- import speech_recognition as sr
 
 
3
  from pydub import AudioSegment
4
 
5
- # Function to transcribe audio to text
6
- def transcribe_audio(audio_input):
7
- recognizer = sr.Recognizer()
8
- audio_file = sr.AudioFile(audio_input)
9
- with audio_file as source:
10
- audio_data = recognizer.record(source)
11
- try:
12
- text = recognizer.recognize_google(audio_data)
13
- except sr.UnknownValueError:
14
- text = "Sorry, I couldn't understand the audio."
15
- except sr.RequestError:
16
- text = "Sorry, there was a problem with the request."
17
- return text
18
-
19
- # Function to generate a response (you'll need to implement this)
20
- def generate_response(user_input):
21
- # Placeholder for the text response generation and TTS part
22
- text_response = f"Responding as Tommy Vercetti: {user_input}"
23
- # Generate audio path based on text_response
24
- output_path = "response.wav" # Placeholder path
25
- # Implement TTS and save to output_path
26
- return text_response, output_path
27
 
28
  # Function to process the audio input and return both text and audio response
29
  def respond(audio_input):
30
- print(audio_input)
31
- user_input = transcribe_audio(audio_input)
32
- text_response, output_path = generate_response(user_input)
 
 
 
 
 
 
 
 
 
 
 
33
 
 
34
  audio = AudioSegment.from_wav(output_path)
35
  duration = len(audio) / 1000
36
 
 
1
  import gradio as gr
2
+ from pyannote.audio import Pipeline
3
+ from pyannote.core import Segment
4
+ from pyannote.audio import Audio
5
  from pydub import AudioSegment
6
 
7
+ api_k = os.getenv("API_KEY")
8
+ # Initialize the pyannote pipeline for speaker diarization
9
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token=api_k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Function to process the audio input and return both text and audio response
12
  def respond(audio_input):
13
+ # Save the audio input to a file
14
+ audio_file = audio_input
15
+ audio_input.save(audio_file)
16
+
17
+ # Perform diarization on the whole file
18
+ diarization = pipeline(audio_file)
19
+
20
+ # Example: Extract and process a specific segment
21
+ excerpt = Segment(start=2.0, end=5.0)
22
+ waveform, sample_rate = Audio().crop(audio_file, excerpt)
23
+
24
+ # Further processing (placeholder for actual implementation)
25
+ text_response = f"Processed segment from {excerpt.start} to {excerpt.end} seconds."
26
+ output_path = "response.wav" # Placeholder path
27
 
28
+ # Generate an audio file as the response (placeholder for TTS implementation)
29
  audio = AudioSegment.from_wav(output_path)
30
  duration = len(audio) / 1000
31