Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
from langdetect import detect | |
from transformers import pipeline | |
from keybert import KeyBERT | |
from fpdf import FPDF | |
import os | |
import re | |
import unicodedata | |
# --- SETUP --- | |
openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
kw_model = KeyBERT() | |
FONT_PATH = "DejaVuSans.ttf" # Must be uploaded to Space root! | |
BRANDS = [ | |
"Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "ICICI", | |
"Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio", | |
"Motilal", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber" | |
] | |
def extract_brands(text): | |
found = [brand for brand in BRANDS if brand.lower() in text.lower()] | |
return found if found else ["None detected"] | |
def extract_topics(text, top_n=5): | |
keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english') | |
topics = [kw for kw, score in keywords] | |
return topics if topics else ["None extracted"] | |
def make_bullets(summary): | |
sentences = summary.replace("\n", " ").split('. ') | |
bullets = [f"- {s.strip()}" for s in sentences if s.strip()] | |
return "\n".join(bullets) | |
def make_str(val): | |
try: | |
if val is None: | |
return "" | |
if isinstance(val, (bool, int, float)): | |
return str(val) | |
if isinstance(val, list): | |
return "\n".join([make_str(v) for v in val]) | |
if isinstance(val, dict): | |
return str(val) | |
return str(val) | |
except Exception: | |
return "" | |
def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50): | |
"""Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash.""" | |
if not isinstance(text, str): | |
text = str(text) | |
# Remove unprintable chars (e.g. control characters) | |
text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C") | |
# Step 1: break long words | |
def break_long_words(t): | |
lines = [] | |
for paragraph in t.split('\n'): | |
for word in paragraph.split(' '): | |
while len(word) > maxlen: | |
lines.append(word[:maxlen]) | |
word = word[maxlen:] | |
lines.append(word) | |
lines.append('') | |
return '\n'.join(lines) | |
text = break_long_words(text) | |
# Step 2: ensure no line is too long (wrap at maxlen) | |
wrapped = [] | |
for line in text.splitlines(): | |
while len(line) > maxlen: | |
wrapped.append(line[:maxlen]) | |
line = line[maxlen:] | |
wrapped.append(line) | |
safe_text = '\n'.join(wrapped) | |
pdf.multi_cell(w, h, safe_text) | |
def create_pdf_report(language, transcript_en, brands, topics, key_takeaways): | |
pdf = FPDF() | |
pdf.set_auto_page_break(auto=True, margin=10) | |
pdf.set_margins(left=10, top=10, right=10) | |
pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True) | |
pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True) | |
pdf.add_page() | |
pdf.set_font("DejaVu", "B", 16) | |
pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C") | |
pdf.set_font("DejaVu", size=12) | |
pdf.ln(5) | |
pdf.cell(0, 10, f"Detected Language: {language}", ln=True) | |
pdf.ln(5) | |
pdf.set_font("DejaVu", "B", 12) | |
pdf.cell(0, 10, "English Transcript:", ln=True) | |
pdf.set_font("DejaVu", size=12) | |
very_safe_multicell(pdf, transcript_en or "", maxlen=50) | |
pdf.ln(3) | |
pdf.set_font("DejaVu", "B", 12) | |
pdf.cell(0, 10, "Brands Detected:", ln=True) | |
pdf.set_font("DejaVu", size=12) | |
very_safe_multicell(pdf, ", ".join(brands), maxlen=50) | |
pdf.set_font("DejaVu", "B", 12) | |
pdf.cell(0, 10, "Key Topics:", ln=True) | |
pdf.set_font("DejaVu", size=12) | |
very_safe_multicell(pdf, ", ".join(topics), maxlen=50) | |
pdf.set_font("DejaVu", "B", 12) | |
pdf.cell(0, 10, "Summary (Bulleted):", ln=True) | |
pdf.set_font("DejaVu", size=10) | |
for takeaway in key_takeaways.split('\n'): | |
very_safe_multicell(pdf, takeaway, maxlen=50) | |
pdf_file = "/tmp/analysis_report.pdf" | |
pdf.output(pdf_file) | |
return pdf_file | |
def process_audio(audio_path): | |
if not audio_path or not isinstance(audio_path, str): | |
return ("No audio file provided.", "", "", "", "", "", None) | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript = make_str(transcript).strip() | |
except Exception as e: | |
return (f"Error in transcription: {e}", "", "", "", "", "", None) | |
try: | |
detected_lang = detect(transcript) | |
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang) | |
except Exception: | |
lang_text = "unknown" | |
transcript_en = transcript | |
if detected_lang != "en": | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript_en = openai.audio.translations.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript_en = make_str(transcript_en).strip() | |
except Exception as e: | |
transcript_en = f"Error translating: {e}" | |
try: | |
summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) | |
summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj) | |
except Exception as e: | |
summary = f"Error summarizing: {e}" | |
brands = extract_brands(transcript_en) | |
topics = extract_topics(transcript_en) | |
key_takeaways = make_bullets(summary) | |
pdf_file = create_pdf_report(lang_text, transcript_en, brands, topics, key_takeaways) | |
return ( | |
lang_text, | |
transcript, | |
transcript_en, | |
", ".join(brands), | |
", ".join(topics), | |
key_takeaways, | |
pdf_file | |
) | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"), | |
outputs=[ | |
gr.Textbox(label="Detected Language"), | |
gr.Textbox(label="Original Transcript"), | |
gr.Textbox(label="English Transcript (if translated)"), | |
gr.Textbox(label="Brands Detected"), | |
gr.Textbox(label="Key Topics"), | |
gr.Textbox(label="Bulleted Key Takeaways"), | |
gr.File(label="Download PDF Report") | |
], | |
title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + Unicode PDF Download)", | |
description="Upload your audio file (MP3/WAV). Get transcript, summary, brand & topic detection, and download PDF. Unicode (Indian language/emoji) supported." | |
) | |
iface.launch() | |