jaisun2004 commited on
Commit
8be9a26
·
verified ·
1 Parent(s): 44a5cc4

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. DejaVuSans.ttf +3 -0
  3. README.md +25 -6
  4. app.py +180 -0
  5. gitattributes +36 -0
  6. requirements.txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DejaVuSans.ttf filter=lfs diff=lfs merge=lfs -text
DejaVuSans.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da195a74c55bef988d0d48f9508bd5d849425c1770dba5d7bfc6ce9ed848954
3
+ size 757076
README.md CHANGED
@@ -1,13 +1,32 @@
1
  ---
2
- title: Audiototext
3
- emoji:
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.31.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: mfilterit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: YouTube Transcript & Summary
3
+ emoji: 🎧
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.31.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # YouTube Transcript, Translation & Summary (Whisper + Hugging Face)
13
+
14
+ This Space extracts audio from a YouTube video, detects language, transcribes speech using OpenAI Whisper, translates to English if needed, and provides a summary.
15
+
16
+ **How to use:**
17
+ 1. Paste a YouTube URL in the box.
18
+ 2. Click "Process".
19
+ 3. View detected language, full transcript, English translation, and summary.
20
+
21
+ **Tech stack:** Gradio, Hugging Face Transformers, OpenAI Whisper, Facebook BART, yt-dlp
22
+
23
+ ---
24
+
25
+ ## Requirements
26
+ - All dependencies listed in requirements.txt.
27
+ - yt-dlp is included as a pip dependency.
28
+
29
+ ---
30
+
31
+ ## Author
32
+ Built for quick demo and prototyping by Jagan (template by ChatGPT).
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ from langdetect import detect
4
+ from transformers import pipeline
5
+ from keybert import KeyBERT
6
+ from fpdf import FPDF
7
+ import os
8
+ import re
9
+ import unicodedata
10
+
11
+ # --- SETUP ---
12
+ openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
13
+
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ kw_model = KeyBERT()
16
+ FONT_PATH = "DejaVuSans.ttf" # Must be uploaded to Space root!
17
+
18
+ BRANDS = [
19
+ "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "ICICI",
20
+ "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
21
+ "Motilal", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber"
22
+ ]
23
+
24
+ def extract_brands(text):
25
+ found = [brand for brand in BRANDS if brand.lower() in text.lower()]
26
+ return found if found else ["None detected"]
27
+
28
+ def extract_topics(text, top_n=5):
29
+ keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english')
30
+ topics = [kw for kw, score in keywords]
31
+ return topics if topics else ["None extracted"]
32
+
33
+ def make_bullets(summary):
34
+ sentences = summary.replace("\n", " ").split('. ')
35
+ bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
36
+ return "\n".join(bullets)
37
+
38
+ def make_str(val):
39
+ try:
40
+ if val is None:
41
+ return ""
42
+ if isinstance(val, (bool, int, float)):
43
+ return str(val)
44
+ if isinstance(val, list):
45
+ return "\n".join([make_str(v) for v in val])
46
+ if isinstance(val, dict):
47
+ return str(val)
48
+ return str(val)
49
+ except Exception:
50
+ return ""
51
+
52
+ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50):
53
+ """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
54
+ if not isinstance(text, str):
55
+ text = str(text)
56
+ # Remove unprintable chars (e.g. control characters)
57
+ text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C")
58
+ # Step 1: break long words
59
+ def break_long_words(t):
60
+ lines = []
61
+ for paragraph in t.split('\n'):
62
+ for word in paragraph.split(' '):
63
+ while len(word) > maxlen:
64
+ lines.append(word[:maxlen])
65
+ word = word[maxlen:]
66
+ lines.append(word)
67
+ lines.append('')
68
+ return '\n'.join(lines)
69
+ text = break_long_words(text)
70
+ # Step 2: ensure no line is too long (wrap at maxlen)
71
+ wrapped = []
72
+ for line in text.splitlines():
73
+ while len(line) > maxlen:
74
+ wrapped.append(line[:maxlen])
75
+ line = line[maxlen:]
76
+ wrapped.append(line)
77
+ safe_text = '\n'.join(wrapped)
78
+ pdf.multi_cell(w, h, safe_text)
79
+
80
+ def create_pdf_report(language, transcript_en, brands, topics, key_takeaways):
81
+ pdf = FPDF()
82
+ pdf.set_auto_page_break(auto=True, margin=10)
83
+ pdf.set_margins(left=10, top=10, right=10)
84
+ pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
85
+ pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
86
+ pdf.add_page()
87
+ pdf.set_font("DejaVu", "B", 16)
88
+ pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
89
+ pdf.set_font("DejaVu", size=12)
90
+ pdf.ln(5)
91
+ pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
92
+ pdf.ln(5)
93
+ pdf.set_font("DejaVu", "B", 12)
94
+ pdf.cell(0, 10, "English Transcript:", ln=True)
95
+ pdf.set_font("DejaVu", size=12)
96
+ very_safe_multicell(pdf, transcript_en or "", maxlen=50)
97
+ pdf.ln(3)
98
+ pdf.set_font("DejaVu", "B", 12)
99
+ pdf.cell(0, 10, "Brands Detected:", ln=True)
100
+ pdf.set_font("DejaVu", size=12)
101
+ very_safe_multicell(pdf, ", ".join(brands), maxlen=50)
102
+ pdf.set_font("DejaVu", "B", 12)
103
+ pdf.cell(0, 10, "Key Topics:", ln=True)
104
+ pdf.set_font("DejaVu", size=12)
105
+ very_safe_multicell(pdf, ", ".join(topics), maxlen=50)
106
+ pdf.set_font("DejaVu", "B", 12)
107
+ pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
108
+ pdf.set_font("DejaVu", size=10)
109
+ for takeaway in key_takeaways.split('\n'):
110
+ very_safe_multicell(pdf, takeaway, maxlen=50)
111
+ pdf_file = "/tmp/analysis_report.pdf"
112
+ pdf.output(pdf_file)
113
+ return pdf_file
114
+
115
+ def process_audio(audio_path):
116
+ if not audio_path or not isinstance(audio_path, str):
117
+ return ("No audio file provided.", "", "", "", "", "", None)
118
+ try:
119
+ with open(audio_path, "rb") as audio_file:
120
+ transcript = openai.audio.transcriptions.create(
121
+ model="whisper-1",
122
+ file=audio_file,
123
+ response_format="text"
124
+ )
125
+ transcript = make_str(transcript).strip()
126
+ except Exception as e:
127
+ return (f"Error in transcription: {e}", "", "", "", "", "", None)
128
+ try:
129
+ detected_lang = detect(transcript)
130
+ lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
131
+ except Exception:
132
+ lang_text = "unknown"
133
+ transcript_en = transcript
134
+ if detected_lang != "en":
135
+ try:
136
+ with open(audio_path, "rb") as audio_file:
137
+ transcript_en = openai.audio.translations.create(
138
+ model="whisper-1",
139
+ file=audio_file,
140
+ response_format="text"
141
+ )
142
+ transcript_en = make_str(transcript_en).strip()
143
+ except Exception as e:
144
+ transcript_en = f"Error translating: {e}"
145
+ try:
146
+ summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False)
147
+ summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj)
148
+ except Exception as e:
149
+ summary = f"Error summarizing: {e}"
150
+ brands = extract_brands(transcript_en)
151
+ topics = extract_topics(transcript_en)
152
+ key_takeaways = make_bullets(summary)
153
+ pdf_file = create_pdf_report(lang_text, transcript_en, brands, topics, key_takeaways)
154
+ return (
155
+ lang_text,
156
+ transcript,
157
+ transcript_en,
158
+ ", ".join(brands),
159
+ ", ".join(topics),
160
+ key_takeaways,
161
+ pdf_file
162
+ )
163
+
164
+ iface = gr.Interface(
165
+ fn=process_audio,
166
+ inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"),
167
+ outputs=[
168
+ gr.Textbox(label="Detected Language"),
169
+ gr.Textbox(label="Original Transcript"),
170
+ gr.Textbox(label="English Transcript (if translated)"),
171
+ gr.Textbox(label="Brands Detected"),
172
+ gr.Textbox(label="Key Topics"),
173
+ gr.Textbox(label="Bulleted Key Takeaways"),
174
+ gr.File(label="Download PDF Report")
175
+ ],
176
+ title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + Unicode PDF Download)",
177
+ description="Upload your audio file (MP3/WAV). Get transcript, summary, brand & topic detection, and download PDF. Unicode (Indian language/emoji) supported."
178
+ )
179
+
180
+ iface.launch()
gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DejaVuSans.ttf filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai
2
+ gradio>=4.44.1
3
+ langdetect
4
+ transformers
5
+ torch
6
+ fpdf2
7
+ keybert