jaisun2004 commited on
Commit
9b53571
·
verified ·
1 Parent(s): 2ff90cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -6,6 +6,7 @@ from keybert import KeyBERT
6
  from fpdf import FPDF
7
  import os
8
  import re
 
9
 
10
  # --- SETUP ---
11
  openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
@@ -17,7 +18,7 @@ FONT_PATH = "DejaVuSans.ttf" # Must be uploaded to Space root!
17
  BRANDS = [
18
  "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
19
  "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
20
- "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber","Zerodha","Motilal","ICICI","HDFC","grow", "Ind Money"
21
  ]
22
 
23
  def extract_brands(text):
@@ -48,11 +49,13 @@ def make_str(val):
48
  except Exception:
49
  return ""
50
 
51
- def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
52
  """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
53
  if not isinstance(text, str):
54
  text = str(text)
55
- # Step 1: break any long 'words'
 
 
56
  def break_long_words(t):
57
  lines = []
58
  for paragraph in t.split('\n'):
@@ -64,7 +67,7 @@ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
64
  lines.append('')
65
  return '\n'.join(lines)
66
  text = break_long_words(text)
67
- # Step 2: ensure no line is too long (wrap at maxlen, regardless of word boundaries)
68
  wrapped = []
69
  for line in text.splitlines():
70
  while len(line) > maxlen:
@@ -76,32 +79,34 @@ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=80):
76
 
77
  def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
78
  pdf = FPDF()
79
- pdf.add_page()
 
80
  pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
81
  pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
 
82
  pdf.set_font("DejaVu", "B", 16)
83
  pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
84
  pdf.set_font("DejaVu", size=12)
85
  pdf.ln(5)
86
  pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
87
  pdf.ln(5)
88
- very_safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""))
89
  pdf.ln(3)
90
- very_safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""))
91
  pdf.ln(3)
92
  pdf.set_font("DejaVu", "B", 12)
93
  pdf.cell(0, 10, "Brands Detected:", ln=True)
94
  pdf.set_font("DejaVu", size=12)
95
- very_safe_multicell(pdf, ", ".join(brands))
96
  pdf.set_font("DejaVu", "B", 12)
97
  pdf.cell(0, 10, "Key Topics:", ln=True)
98
  pdf.set_font("DejaVu", size=12)
99
- very_safe_multicell(pdf, ", ".join(topics))
100
  pdf.set_font("DejaVu", "B", 12)
101
  pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
102
- pdf.set_font("DejaVu", size=12)
103
  for takeaway in key_takeaways.split('\n'):
104
- very_safe_multicell(pdf, takeaway)
105
  pdf_file = "/tmp/analysis_report.pdf"
106
  pdf.output(pdf_file)
107
  return pdf_file
 
6
  from fpdf import FPDF
7
  import os
8
  import re
9
+ import unicodedata
10
 
11
  # --- SETUP ---
12
  openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
 
18
  BRANDS = [
19
  "Apple", "Google", "Microsoft", "Amazon", "Coca-Cola", "Pepsi", "Samsung", "Nike", "Adidas",
20
  "Meta", "Facebook", "Instagram", "YouTube", "Netflix", "Reliance", "Tata", "Airtel", "Jio",
21
+ "Infosys", "Wipro", "Paytm", "Zomato", "Swiggy", "OLA", "Uber"
22
  ]
23
 
24
  def extract_brands(text):
 
49
  except Exception:
50
  return ""
51
 
52
+ def very_safe_multicell(pdf, text, w=0, h=8, maxlen=50):
53
  """Force-break lines so no line/word exceeds maxlen chars, avoiding fpdf2 crash."""
54
  if not isinstance(text, str):
55
  text = str(text)
56
+ # Remove unprintable chars (e.g. control characters)
57
+ text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C")
58
+ # Step 1: break long words
59
  def break_long_words(t):
60
  lines = []
61
  for paragraph in t.split('\n'):
 
67
  lines.append('')
68
  return '\n'.join(lines)
69
  text = break_long_words(text)
70
+ # Step 2: ensure no line is too long (wrap at maxlen)
71
  wrapped = []
72
  for line in text.splitlines():
73
  while len(line) > maxlen:
 
79
 
80
  def create_pdf_report(language, transcript, transcript_en, summary, brands, topics, key_takeaways):
81
  pdf = FPDF()
82
+ pdf.set_auto_page_break(auto=True, margin=10)
83
+ pdf.set_margins(left=10, top=10, right=10)
84
  pdf.add_font("DejaVu", style="", fname=FONT_PATH, uni=True)
85
  pdf.add_font("DejaVu", style="B", fname=FONT_PATH, uni=True)
86
+ pdf.add_page()
87
  pdf.set_font("DejaVu", "B", 16)
88
  pdf.cell(0, 10, "Audio Transcript & Analysis Report", ln=True, align="C")
89
  pdf.set_font("DejaVu", size=12)
90
  pdf.ln(5)
91
  pdf.cell(0, 10, f"Detected Language: {language}", ln=True)
92
  pdf.ln(5)
93
+ very_safe_multicell(pdf, "Original Transcript:\n" + (transcript or ""), maxlen=50)
94
  pdf.ln(3)
95
+ very_safe_multicell(pdf, "English Transcript:\n" + (transcript_en or ""), maxlen=50)
96
  pdf.ln(3)
97
  pdf.set_font("DejaVu", "B", 12)
98
  pdf.cell(0, 10, "Brands Detected:", ln=True)
99
  pdf.set_font("DejaVu", size=12)
100
+ very_safe_multicell(pdf, ", ".join(brands), maxlen=50)
101
  pdf.set_font("DejaVu", "B", 12)
102
  pdf.cell(0, 10, "Key Topics:", ln=True)
103
  pdf.set_font("DejaVu", size=12)
104
+ very_safe_multicell(pdf, ", ".join(topics), maxlen=50)
105
  pdf.set_font("DejaVu", "B", 12)
106
  pdf.cell(0, 10, "Summary (Bulleted):", ln=True)
107
+ pdf.set_font("DejaVu", size=10)
108
  for takeaway in key_takeaways.split('\n'):
109
+ very_safe_multicell(pdf, takeaway, maxlen=50)
110
  pdf_file = "/tmp/analysis_report.pdf"
111
  pdf.output(pdf_file)
112
  return pdf_file