import gradio as gr import openai from langdetect import detect from transformers import pipeline from keybert import KeyBERT from fpdf import FPDF import os openai.api_key = os.getenv("OPENAI_API_KEY") # Set this in HF Space secrets summarizer = pipeline("summarization", model="facebook/bart-large-cnn") kw_model = KeyBERT() # Sample brand list for detection (customize as needed) BRANDS = ["Zerodha", "Motilal", "ICICI", "HDFC", "ShareKhan", "IND Money", "Samsung", "Nike", "Adidas"] def extract_brands(text): found = [brand for brand in BRANDS if brand.lower() in text.lower()] return found if found else ["None detected"] def extract_topics(text, top_n=5): keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english') topics = [kw for kw, score in keywords] return topics if topics else ["None extracted"] def make_bullets(summary): sentences = summary.replace("\n", " ").split('. ') bullets = [f"- {s.strip()}" for s in sentences if s.strip()] return "\n".join(bullets) def make_str(val): try: if val is None: return "" if isinstance(val, (bool, int, float)): return str(val) if isinstance(val, list): return "\n".join([make_str(v) for v in val]) if isinstance(val, dict): return str(val) return str(val) except Exception: return "" def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", "B", 16) pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C") pdf.set_font("Arial", size=12) pdf.ln(5) pdf.cell(0, 10, f"Detected Language: {language}", ln=True) pdf.ln(5) pdf.multi_cell(0, 8, "Original Transcript:\n" + transcript) pdf.ln(3) pdf.multi_cell(0, 8, "English Transcript:\n" + transcript_en) pdf.ln(3) pdf.set_font("Arial", "B", 12) pdf.cell(0, 10, "Brands Detected:", ln=True) pdf.set_font("Arial", size=12) pdf.multi_cell(0, 8, ", ".join(brands)) pdf.set_font("Arial", "B", 12) pdf.cell(0, 10, "Key Topics:", ln=True) pdf.set_font("Arial", size=12) pdf.multi_cell(0, 8, ", ".join(topics)) pdf.set_font("Arial", "B", 12) pdf.cell(0, 10, "Summary (Bulleted):", ln=True) pdf.set_font("Arial", size=12) for takeaway in key_takeaways.split('\n'): pdf.multi_cell(0, 8, takeaway) # Save to temporary file pdf_file = "/tmp/analysis_report.pdf" pdf.output(pdf_file) return pdf_file def process_audio(audio_path): if not audio_path or not isinstance(audio_path, str): return ("No audio file provided.", "", "", "", "", "", "", None) try: with open(audio_path, "rb") as audio_file: transcript = openai.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) transcript = make_str(transcript).strip() except Exception as e: return (make_str(f"Error in transcription: {e}"), "", "", "", "", "", "", None) try: detected_lang = detect(transcript) lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang) except Exception: lang_text = "unknown" transcript_en = transcript if detected_lang != "en": try: with open(audio_path, "rb") as audio_file: transcript_en = openai.audio.translations.create( model="whisper-1", file=audio_file, response_format="text" ) transcript_en = make_str(transcript_en).strip() except Exception as e: transcript_en = make_str(f"Error translating: {e}") try: summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj) except Exception as e: summary = make_str(f"Error summarizing: {e}") # New: Brands, topics, bullets brands = extract_brands(transcript_en) topics = extract_topics(transcript_en) key_takeaways = make_bullets(summary) # New: PDF file generation pdf_file = create_pdf_report(lang_text, transcript, transcript_en, summary, brands, topics, key_takeaways) return ( lang_text, transcript, transcript_en, ", ".join(brands), ", ".join(topics), key_takeaways, pdf_file ) iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"), outputs=[ gr.Textbox(label="Detected Language"), gr.Textbox(label="Original Transcript"), gr.Textbox(label="English Transcript (if translated)"), gr.Textbox(label="Brands Detected"), gr.Textbox(label="Key Topics"), gr.Textbox(label="Bulleted Key Takeaways"), gr.File(label="Download PDF Report") ], title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + PDF Download)", description="Upload your audio file (MP3/WAV). Get full transcript, summary, brand and topic detection, and download results as PDF." ) iface.launch()