bluenevus commited on
Commit
a18a113
·
verified ·
1 Parent(s): 85f1edd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ import torchaudio.transforms as T
4
+ from transformers import pipeline
5
+ import requests
6
+ from pydub import AudioSegment
7
+ from pydub.silence import split_on_silence
8
+ import io
9
+ import os
10
+
11
+ # Load the transcription model
12
+ transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
13
+
14
+ def download_audio_from_url(url):
15
+ response = requests.get(url)
16
+ audio_bytes = response.content
17
+ return audio_bytes
18
+
19
+ def transcribe_audio(audio_bytes):
20
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
21
+ audio.export("temp_audio.wav", format="wav")
22
+ waveform, sample_rate = torchaudio.load("temp_audio.wav")
23
+ os.remove("temp_audio.wav")
24
+
25
+ # Transcribe the audio
26
+ result = transcription_pipeline(waveform, chunk_length_s=30)
27
+ transcript = result['text']
28
+
29
+ # Split transcript into paragraphs based on silence
30
+ chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
31
+ paragraphs = []
32
+ current_paragraph = ""
33
+
34
+ for chunk in chunks:
35
+ chunk.export("temp_chunk.wav", format="wav")
36
+ waveform, sample_rate = torchaudio.load("temp_chunk.wav")
37
+ os.remove("temp_chunk.wav")
38
+
39
+ chunk_result = transcription_pipeline(waveform, chunk_length_s=30)
40
+ chunk_transcript = chunk_result['text']
41
+
42
+ if chunk_transcript:
43
+ if current_paragraph:
44
+ current_paragraph += " " + chunk_transcript
45
+ else:
46
+ current_paragraph = chunk_transcript
47
+ else:
48
+ if current_paragraph:
49
+ paragraphs.append(current_paragraph)
50
+ current_paragraph = ""
51
+
52
+ if current_paragraph:
53
+ paragraphs.append(current_paragraph)
54
+
55
+ formatted_transcript = "\n\n".join(paragraphs)
56
+ return formatted_transcript
57
+
58
+ def transcribe_video(url):
59
+ audio_bytes = download_audio_from_url(url)
60
+ transcript = transcribe_audio(audio_bytes)
61
+ return transcript
62
+
63
+ def download_transcript(transcript):
64
+ return transcript, "transcript.txt"
65
+
66
+ # Create the Gradio interface
67
+ with gr.Blocks(title="Video Transcription") as demo:
68
+ gr.Markdown("# Video Transcription")
69
+ video_url = gr.Textbox(label="Video URL")
70
+ transcribe_button = gr.Button("Transcribe")
71
+ transcript_output = gr.Textbox(label="Transcript", lines=20)
72
+ download_button = gr.Button("Download Transcript")
73
+ download_link = gr.File(label="Download Transcript")
74
+
75
+ transcribe_button.click(fn=transcribe_video, inputs=video_url, outputs=transcript_output)
76
+ download_button.click(fn=download_transcript, inputs=transcript_output, outputs=[download_link, download_link])
77
+
78
+ demo.launch()