Spaces:

jaisun2004
/

YoutubeTranscript

Sleeping

App Files Files Community

jaisun2004 commited on May 28

Commit

2ff90cd

verified ·

1 Parent(s): 1bda6d9

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -14

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ FONT_PATH = "DejaVuSans.ttf"  # Must be uploaded to Space root!
 BRANDS = [
     "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
     "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
-    "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber"
 ]
 def extract_brands(text):
@@ -48,17 +48,31 @@ def make_str(val):
     except Exception:
         return ""
-def safe_multicell(pdf, text, w=0, h=8):
-    """Safely adds text to PDF, handling super long words."""
     if not isinstance(text, str):
         text = str(text)
-    # Split very long words (>80 chars) to avoid fpdf2 crash
-    def break_long_words(t, maxlen=80):
-        return re.sub(r'(\S{%d,})' % maxlen,
-                      lambda m: ' '.join([m.group(0)[i:i+maxlen] for i in range(0, len(m.group(0)), maxlen)]),
-                      t)
     text = break_long_words(text)
-    pdf.multi_cell(w, h, text)
 def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
     pdf = FPDF()
@@ -71,23 +85,23 @@ def create_pdf_report(language, transcript, transcript_en, summary, brands, topi
     pdf.ln(5)
     pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
     pdf.ln(5)
-    safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""))
     pdf.ln(3)
-    safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""))
     pdf.ln(3)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Brands Detected:", ln=True)
     pdf.set_font("DejaVu", size=12)
-    safe_multicell(pdf, ", ".join(brands))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Key Topics:", ln=True)
     pdf.set_font("DejaVu", size=12)
-    safe_multicell(pdf, ", ".join(topics))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
     pdf.set_font("DejaVu", size=12)
     for takeaway in key_takeaways.split('\n'):
-        safe_multicell(pdf, takeaway)
     pdf_file = "/tmp/analysis_report.pdf"
     pdf.output(pdf_file)
     return pdf_file

 BRANDS = [
     "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
     "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
+    "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber","Zerodha","Motilal","ICICI","HDFC","grow", "Ind Money"
 ]
 def extract_brands(text):
     except Exception:
         return ""
+def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
+    """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
     if not isinstance(text, str):
         text = str(text)
+    # Step 1: break any long 'words'
+    def break_long_words(t):
+        lines = []
+        for paragraph in t.split('\n'):
+            for word in paragraph.split(' '):
+                while len(word) > maxlen:
+                    lines.append(word[:maxlen])
+                    word = word[maxlen:]
+                lines.append(word)
+            lines.append('')
+        return '\n'.join(lines)
     text = break_long_words(text)
+    # Step 2: ensure no line is too long (wrap at maxlen, regardless of word boundaries)
+    wrapped = []
+    for line in text.splitlines():
+        while len(line) > maxlen:
+            wrapped.append(line[:maxlen])
+            line = line[maxlen:]
+        wrapped.append(line)
+    safe_text = '\n'.join(wrapped)
+    pdf.multi_cell(w, h, safe_text)
 def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
     pdf = FPDF()
     pdf.ln(5)
     pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
     pdf.ln(5)
+    very_safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""))
     pdf.ln(3)
+    very_safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""))
     pdf.ln(3)
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Brands Detected:", ln=True)
     pdf.set_font("DejaVu", size=12)
+    very_safe_multicell(pdf, ", ".join(brands))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Key Topics:", ln=True)
     pdf.set_font("DejaVu", size=12)
+    very_safe_multicell(pdf, ", ".join(topics))
     pdf.set_font("DejaVu", "B", 12)
     pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
     pdf.set_font("DejaVu", size=12)
     for takeaway in key_takeaways.split('\n'):
+        very_safe_multicell(pdf, takeaway)
     pdf_file = "/tmp/analysis_report.pdf"
     pdf.output(pdf_file)
     return pdf_file