Spaces:

jaisun2004
/

Audiototext

Sleeping

App Files Files Community

jaisun2004 commited on May 28

Commit

cd96b52

verified ·

1 Parent(s): 8be9a26

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -80

app.py CHANGED Viewed

@@ -3,22 +3,21 @@ import openai
 from langdetect import detect
 from transformers import pipeline
 from keybert import KeyBERT
-from fpdf import FPDF
 import os
-import re
-import unicodedata
 # --- SETUP ---
 openai.api_key = os.getenv("OPENAI_API_KEY")  # Set in HF Space Secrets
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 kw_model = KeyBERT()
-FONT_PATH = "DejaVuSans.ttf"  # Must be uploaded to Space root!
 BRANDS = [
-    "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "ICICI",
-    "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
-    "Motilal", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber"
 ]
 def extract_brands(text):
@@ -49,72 +48,9 @@ def make_str(val):
     except Exception:
         return ""
-def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50):
-    """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
-    if not isinstance(text, str):
-        text = str(text)
-    # Remove unprintable chars (e.g. control characters)
-    text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C")
-    # Step 1: break long words
-    def break_long_words(t):
-        lines = []
-        for paragraph in t.split('\n'):
-            for word in paragraph.split(' '):
-                while len(word) > maxlen:
-                    lines.append(word[:maxlen])
-                    word = word[maxlen:]
-                lines.append(word)
-            lines.append('')
-        return '\n'.join(lines)
-    text = break_long_words(text)
-    # Step 2: ensure no line is too long (wrap at maxlen)
-    wrapped = []
-    for line in text.splitlines():
-        while len(line) > maxlen:
-            wrapped.append(line[:maxlen])
-            line = line[maxlen:]
-        wrapped.append(line)
-    safe_text = '\n'.join(wrapped)
-    pdf.multi_cell(w, h, safe_text)
-def create_pdf_report(language, transcript_en, brands, topics, key_takeaways):
-    pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=10)
-    pdf.set_margins(left=10, top=10, right=10)
-    pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
-    pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
-    pdf.add_page()
-    pdf.set_font("DejaVu", "B", 16)
-    pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
-    pdf.set_font("DejaVu", size=12)
-    pdf.ln(5)
-    pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
-    pdf.ln(5)
-    pdf.set_font("DejaVu", "B", 12)
-    pdf.cell(0, 10, "English Transcript:", ln=True)
-    pdf.set_font("DejaVu", size=12)
-    very_safe_multicell(pdf, transcript_en or "", maxlen=50)
-    pdf.ln(3)
-    pdf.set_font("DejaVu", "B", 12)
-    pdf.cell(0, 10, "Brands Detected:", ln=True)
-    pdf.set_font("DejaVu", size=12)
-    very_safe_multicell(pdf, ", ".join(brands), maxlen=50)
-    pdf.set_font("DejaVu", "B", 12)
-    pdf.cell(0, 10, "Key Topics:", ln=True)
-    pdf.set_font("DejaVu", size=12)
-    very_safe_multicell(pdf, ", ".join(topics), maxlen=50)
-    pdf.set_font("DejaVu", "B", 12)
-    pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
-    pdf.set_font("DejaVu", size=10)
-    for takeaway in key_takeaways.split('\n'):
-        very_safe_multicell(pdf, takeaway, maxlen=50)
-    pdf_file = "/tmp/analysis_report.pdf"
-    pdf.output(pdf_file)
-    return pdf_file
 def process_audio(audio_path):
     if not audio_path or not isinstance(audio_path, str):
-        return ("No audio file provided.", "", "", "", "", "", None)
     try:
         with open(audio_path, "rb") as audio_file:
             transcript = openai.audio.transcriptions.create(
@@ -124,7 +60,7 @@ def process_audio(audio_path):
             )
         transcript = make_str(transcript).strip()
     except Exception as e:
-        return (f"Error in transcription: {e}", "", "", "", "", "", None)
     try:
         detected_lang = detect(transcript)
         lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
@@ -150,15 +86,13 @@ def process_audio(audio_path):
     brands = extract_brands(transcript_en)
     topics = extract_topics(transcript_en)
     key_takeaways = make_bullets(summary)
-    pdf_file = create_pdf_report(lang_text, transcript_en, brands, topics, key_takeaways)
     return (
         lang_text,
         transcript,
         transcript_en,
         ", ".join(brands),
         ", ".join(topics),
-        key_takeaways,
-        pdf_file
     )
 iface = gr.Interface(
@@ -168,13 +102,12 @@ iface = gr.Interface(
         gr.Textbox(label="Detected Language"),
         gr.Textbox(label="Original Transcript"),
         gr.Textbox(label="English Transcript (if translated)"),
-        gr.Textbox(label="Brands Detected"),
         gr.Textbox(label="Key Topics"),
-        gr.Textbox(label="Bulleted Key Takeaways"),
-        gr.File(label="Download PDF Report")
     ],
-    title="Audio Transcript, Brand & Topic Analysis (OpenAI Whisper + Unicode PDF Download)",
-    description="Upload your audio file (MP3/WAV). Get transcript, summary, brand & topic detection, and download PDF. Unicode (Indian language/emoji) supported."
 )
 iface.launch()

 from langdetect import detect
 from transformers import pipeline
 from keybert import KeyBERT
 import os
 # --- SETUP ---
 openai.api_key = os.getenv("OPENAI_API_KEY")  # Set in HF Space Secrets
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 kw_model = KeyBERT()
+# Key Indian brokerages, investment apps, and fintech brands
 BRANDS = [
+    "Zerodha", "Upstox", "Groww", "Angel One", "Motilal Oswal", "Sharekhan", "5paisa", "ICICI Direct",
+    "HDFC Securities", "Kotak Securities", "Axis Direct", "IIFL", "Paytm Money", "Edelweiss", "Geojit",
+    "Fyers", "Alice Blue", "mStock", "Stockal", "Kuvera", "Smallcase", "Jupiter", "Fi", "INDmoney",
+    "PhonePe", "Paytm", "Google Pay", "BHIM", "MobiKwik", "Cred", "Niyo", "Razorpay", "ETMoney",
+    "Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential"
 ]
 def extract_brands(text):
     except Exception:
         return ""
 def process_audio(audio_path):
     if not audio_path or not isinstance(audio_path, str):
+        return ("No audio file provided.", "", "", "", "", "")
     try:
         with open(audio_path, "rb") as audio_file:
             transcript = openai.audio.transcriptions.create(
             )
         transcript = make_str(transcript).strip()
     except Exception as e:
+        return (f"Error in transcription: {e}", "", "", "", "", "")
     try:
         detected_lang = detect(transcript)
         lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
     brands = extract_brands(transcript_en)
     topics = extract_topics(transcript_en)
     key_takeaways = make_bullets(summary)
     return (
         lang_text,
         transcript,
         transcript_en,
         ", ".join(brands),
         ", ".join(topics),
+        key_takeaways
     )
 iface = gr.Interface(
         gr.Textbox(label="Detected Language"),
         gr.Textbox(label="Original Transcript"),
         gr.Textbox(label="English Transcript (if translated)"),
+        gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"),
         gr.Textbox(label="Key Topics"),
+        gr.Textbox(label="Bulleted Key Takeaways")
     ],
+    title="Audio Brand & Topic Analysis for Indian Finance Apps",
+    description="Upload your audio file (MP3/WAV). Get transcript, summary, *Indian brokerage & fintech brand detection*, key topics, and a bulleted summary. Powered by OpenAI Whisper and BART."
 )
 iface.launch()