import gradio as gr import openai from langdetect import detect from transformers import pipeline from keybert import KeyBERT from fpdf import FPDF import os import re import unicodedata # --- SETUP --- openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets summarizer = pipeline("summarization", model="facebook/bart-large-cnn") kw_model = KeyBERT() FONT_PATH = "DejaVuSans.ttf" # Must be uploaded to Space root! BRANDS = [ "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "ICICI", "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio", "Motilal", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber" ] def extract_brands(text): found = [brand for brand in BRANDS if brand.lower() in text.lower()] return found if found else ["None detected"] def extract_topics(text, top_n=5): keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english') topics = [kw for kw, score in keywords] return topics if topics else ["None extracted"] def make_bullets(summary): sentences = summary.replace("\n", " ").split('. ') bullets = [f"- {s.strip()}" for s in sentences if s.strip()] return "\n".join(bullets) def make_str(val): try: if val is None: return "" if isinstance(val, (bool, int, float)): return str(val) if isinstance(val, list): return "\n".join([make_str(v) for v in val]) if isinstance(val, dict): return str(val) return str(val) except Exception: return "" def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50): """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash.""" if not isinstance(text, str): text = str(text) # Remove unprintable chars (e.g. control characters) text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C") # Step 1: break long words def break_long_words(t): lines = [] for paragraph in t.split('\n'): for word in paragraph.split(' '): while len(word) > maxlen: lines.append(word[:maxlen]) word = word[maxlen:] lines.append(word) lines.append('') return '\n'.join(lines) text = break_long_words(text) # Step 2: ensure no line is too long (wrap at maxlen) wrapped = [] for line in text.splitlines(): while len(line) > maxlen: wrapped.append(line[:maxlen]) line = line[maxlen:] wrapped.append(line) safe_text = '\n'.join(wrapped) pdf.multi_cell(w, h, safe_text) def create_pdf_report(language, transcript_en, brands, topics, key_takeaways): pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=10) pdf.set_margins(left=10, top=10, right=10) pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True) pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True) pdf.add_page() pdf.set_font("DejaVu", "B", 16) pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C") pdf.set_font("DejaVu", size=12) pdf.ln(5) pdf.cell(0, 10, f"Detected Language: {language}", ln=True) pdf.ln(5) pdf.set_font("DejaVu", "B", 12) pdf.cell(0, 10, "English Transcript:", ln=True) pdf.set_font("DejaVu", size=12) very_safe_multicell(pdf, transcript_en or "", maxlen=50) pdf.ln(3) pdf.set_font("DejaVu", "B", 12) pdf.cell(0, 10, "Brands Detected:", ln=True) pdf.set_font("DejaVu", size=12) very_safe_multicell(pdf, ", ".join(brands), maxlen=50) pdf.set_font("DejaVu", "B", 12) pdf.cell(0, 10, "Key Topics:", ln=True) pdf.set_font("DejaVu", size=12) very_safe_multicell(pdf, ", ".join(topics), maxlen=50) pdf.set_font("DejaVu", "B", 12) pdf.cell(0, 10, "Summary (Bulleted):", ln=True) pdf.set_font("DejaVu", size=10) for takeaway in key_takeaways.split('\n'): very_safe_multicell(pdf, takeaway, maxlen=50) pdf_file = "/tmp/analysis_report.pdf" pdf.output(pdf_file) return pdf_file def process_audio(audio_path): if not audio_path or not isinstance(audio_path, str): return ("No audio file provided.", "", "", "", "", "", None) try: with open(audio_path, "rb") as audio_file: transcript = openai.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) transcript = make_str(transcript).strip() except Exception as e: return (f"Error in transcription: {e}", "", "", "", "", "", None) try: detected_lang = detect(transcript) lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang) except Exception: lang_text = "unknown" transcript_en = transcript if detected_lang != "en": try: with open(audio_path, "rb") as audio_file: transcript_en = openai.audio.translations.create( model="whisper-1", file=audio_file, response_format="text" ) transcript_en = make_str(transcript_en).strip() except Exception as e: transcript_en = f"Error translating: {e}" try: summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj) except Exception as e: summary = f"Error summarizing: {e}" brands = extract_brands(transcript_en) topics = extract_topics(transcript_en) key_takeaways = make_bullets(summary) pdf_file = create_pdf_report(lang_text, transcript_en, brands, topics, key_takeaways) return ( lang_text, transcript, transcript_en, ", ".join(brands), ", ".join(topics), key_takeaways, pdf_file ) iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"), outputs=[ gr.Textbox(label="Detected Language"), gr.Textbox(label="Original Transcript"), gr.Textbox(label="English Transcript (if translated)"), gr.Textbox(label="Brands Detected"), gr.Textbox(label="Key Topics"), gr.Textbox(label="Bulleted Key Takeaways"), gr.File(label="Download PDF Report") ], title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + Unicode PDF Download)", description="Upload your audio file (MP3/WAV). Get transcript, summary, brand & topic detection, and download PDF. Unicode (Indian language/emoji) supported." ) iface.launch()