Spaces:

jaisun2004
/

YoutubeTranscript

Sleeping

App Files Files Community

jaisun2004 commited on May 28

Commit

9b53571

verified ·

1 Parent(s): 2ff90cd

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from keybert import KeyBERT
 from fpdf import FPDF
 import os
 import re
 # --- SETUP ---
 openai.api_key = os.getenv("OPENAI_API_KEY")  # Set in HF Space Secrets
@@ -17,7 +18,7 @@ FONT_PATH = "DejaVuSans.ttf"  # Must be uploaded to Space root!
 BRANDS = [
     "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
     "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
-    "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber","Zerodha","Motilal","ICICI","HDFC","grow", "Ind Money"
 ]
 def extract_brands(text):
@@ -48,11 +49,13 @@ def make_str(val):
     except Exception:
         return ""
-def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
     """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
     if not isinstance(text, str):
         text = str(text)
-    # Step 1: break any long 'words'
     def break_long_words(t):
         lines = []
         for paragraph in t.split('\n'):
@@ -64,7 +67,7 @@ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
             lines.append('')
         return '\n'.join(lines)
     text = break_long_words(text)
-    # Step 2: ensure no line is too long (wrap at maxlen, regardless of word boundaries)
     wrapped = []
     for line in text.splitlines():
         while len(line) > maxlen:
@@ -76,32 +79,34 @@ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
 def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
     pdf = FPDF()
-    pdf.add_page()
     pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
     pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
     pdf.set_font("DejaVu", "B", 16)
     pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
     pdf.set_font("DejaVu", size=12)
     pdf.ln(5)
     pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
     pdf.ln(5)
-    very_safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""))
     pdf.ln(3)
-    very_safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""))
     pdf.ln(3)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Brands Detected:", ln=True)
     pdf.set_font("DejaVu", size=12)
-    very_safe_multicell(pdf, ", ".join(brands))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Key Topics:", ln=True)
     pdf.set_font("DejaVu", size=12)
-    very_safe_multicell(pdf, ", ".join(topics))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
-    pdf.set_font("DejaVu", size=12)
     for takeaway in key_takeaways.split('\n'):
-        very_safe_multicell(pdf, takeaway)
     pdf_file = "/tmp/analysis_report.pdf"
     pdf.output(pdf_file)
     return pdf_file

 from fpdf import FPDF
 import os
 import re
+import unicodedata
 # --- SETUP ---
 openai.api_key = os.getenv("OPENAI_API_KEY")  # Set in HF Space Secrets
 BRANDS = [
     "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
     "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
+    "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber"
 ]
 def extract_brands(text):
     except Exception:
         return ""
+def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50):
     """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
     if not isinstance(text, str):
         text = str(text)
+    # Remove unprintable chars (e.g. control characters)
+    text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C")
+    # Step 1: break long words
     def break_long_words(t):
         lines = []
         for paragraph in t.split('\n'):
             lines.append('')
         return '\n'.join(lines)
     text = break_long_words(text)
+    # Step 2: ensure no line is too long (wrap at maxlen)
     wrapped = []
     for line in text.splitlines():
         while len(line) > maxlen:
 def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
     pdf = FPDF()
+    pdf.set_auto_page_break(auto=True, margin=10)
+    pdf.set_margins(left=10, top=10, right=10)
     pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
     pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
+    pdf.add_page()
     pdf.set_font("DejaVu", "B", 16)
     pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
     pdf.set_font("DejaVu", size=12)
     pdf.ln(5)
     pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
     pdf.ln(5)
+    very_safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""), maxlen=50)
     pdf.ln(3)
+    very_safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""), maxlen=50)
     pdf.ln(3)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Brands Detected:", ln=True)
     pdf.set_font("DejaVu", size=12)
+    very_safe_multicell(pdf, ", ".join(brands), maxlen=50)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Key Topics:", ln=True)
     pdf.set_font("DejaVu", size=12)
+    very_safe_multicell(pdf, ", ".join(topics), maxlen=50)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
+    pdf.set_font("DejaVu", size=10)
     for takeaway in key_takeaways.split('\n'):
+        very_safe_multicell(pdf, takeaway, maxlen=50)
     pdf_file = "/tmp/analysis_report.pdf"
     pdf.output(pdf_file)
     return pdf_file