kabita-choudhary commited on
Commit
b733b99
·
1 Parent(s): e82a7b1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pyannote.audio import Pipeline
3
+ pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token="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")
4
+ def diarization():
5
+ diarization = pipeline("result.wav")
6
+ speakertime=[]
7
+ # print the result
8
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
9
+ details=[turn.start,turn.end,speaker]
10
+ speakertime.append(details)
11
+ #print(turn.start)
12
+ #print(speaker)
13
+ print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
14
+ #print(speakertime)
15
+ df = pd.DataFrame(speakertime,columns=['start', 'end','speaker'])
16
+ text=[]
17
+ for i in range (df.start.count()):
18
+ text.append(generatetext("result.wav",df.start[i], df.end[i]))
19
+ df['text']=text
20
+ with open('my_file.txt', 'w') as my_file:
21
+ for i in range (df.start.count()):
22
+ my_file.write(df.speaker[i]+": " +df.text[i] + '\n')
23
+ print(open("my_file.txt","r").read())
24
+
25
+
26
+ def generatetext(filename,starttime,endtime):
27
+
28
+ t1 = starttime * 1000 # works in milliseconds
29
+ t2 = endtime * 1000
30
+
31
+ newAudio = AudioSegment.from_wav(filename)
32
+ a = newAudio[t1:t2]
33
+ a.export('audio.wav', format="wav")
34
+ text1 = whisper('audio.wav')
35
+ return text1.get("text")
36
+
37
+ block = gr.Blocks()
38
+ with block:
39
+ with gr.Group():
40
+ with gr.Box():
41
+ with gr.Row().style():
42
+
43
+ inp_audio = gr.Audio(
44
+ label="Input Audio",
45
+ type="filepath",
46
+ mirror_webcam = False
47
+ )
48
+ outputdialogs = gr.Textbox()
49
+ btn = gr.Button("Generate Text")
50
+ btn.click(diarisation, inputs=[inp_audio], outputdialogs=[op],api_name="view_api")
51
+ block.launch(enable_queue = True,debug=True)