jaisun2004 commited on
Commit
1d6eeba
·
verified ·
1 Parent(s): 291bb00

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import subprocess
4
+ from transformers import pipeline
5
+ from langdetect import detect
6
+
7
+ def download_audio(youtube_url):
8
+ output_file = "audio.mp3"
9
+ # Remove old file if exists
10
+ if os.path.exists(output_file):
11
+ os.remove(output_file)
12
+ cmd = [
13
+ "yt-dlp", "-x", "--audio-format", "mp3", "-o", output_file, youtube_url
14
+ ]
15
+ subprocess.run(cmd, check=True)
16
+ return output_file
17
+
18
+ def process_youtube(youtube_url):
19
+ try:
20
+ audio_path = download_audio(youtube_url)
21
+ except Exception as e:
22
+ return "Error downloading audio: " + str(e), "", "", ""
23
+ try:
24
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-large")
25
+ result = asr(audio_path)
26
+ transcript = result["text"]
27
+ except Exception as e:
28
+ return "Error in transcription: " + str(e), "", "", ""
29
+ try:
30
+ detected_lang = detect(transcript)
31
+ except Exception:
32
+ detected_lang = "unknown"
33
+ lang_map = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}
34
+ lang_text = lang_map.get(detected_lang, detected_lang)
35
+ transcript_en = transcript
36
+ if detected_lang != "en":
37
+ try:
38
+ asr_translate = pipeline(
39
+ "automatic-speech-recognition",
40
+ model="openai/whisper-large",
41
+ task="translate"
42
+ )
43
+ result_translate = asr_translate(audio_path)
44
+ transcript_en = result_translate["text"]
45
+ except Exception as e:
46
+ transcript_en = f"Error translating: {e}"
47
+ try:
48
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
49
+ summary = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False)
50
+ summary_text = summary[0]["summary_text"]
51
+ except Exception as e:
52
+ summary_text = f"Error summarizing: {e}"
53
+ if os.path.exists(audio_path):
54
+ os.remove(audio_path)
55
+ return lang_text, transcript, transcript_en, summary_text
56
+
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("## YouTube Transcript, Translation & Summary (Powered by Whisper + Hugging Face)")
59
+ url_input = gr.Textbox(label="YouTube URL")
60
+ btn = gr.Button("Process")
61
+ lang_out = gr.Textbox(label="Detected Language")
62
+ transcript_out = gr.Textbox(label="Original Transcript")
63
+ transcript_en_out = gr.Textbox(label="English Transcript (if translated)")
64
+ summary_out = gr.Textbox(label="Summary")
65
+ btn.click(
66
+ process_youtube,
67
+ inputs=[url_input],
68
+ outputs=[lang_out, transcript_out, transcript_en_out, summary_out]
69
+ )
70
+
71
+ demo.launch()