jaisun2004 commited on
Commit
6bf6de7
·
verified ·
1 Parent(s): ea3d090

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -6
app.py CHANGED
@@ -4,6 +4,7 @@ from langdetect import detect
4
  from transformers import pipeline
5
  from keybert import KeyBERT
6
  import os
 
7
 
8
  # --- SETUP ---
9
  openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
@@ -20,6 +21,50 @@ BRANDS = [
20
  "Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential"
21
  ]
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def extract_brands(text):
24
  found = [brand for brand in BRANDS if brand.lower() in text.lower()]
25
  return found if found else ["None detected"]
@@ -29,6 +74,16 @@ def extract_topics(text, top_n=5):
29
  topics = [kw for kw, score in keywords]
30
  return topics if topics else ["None extracted"]
31
 
 
 
 
 
 
 
 
 
 
 
32
  def make_bullets(summary):
33
  sentences = summary.replace("\n", " ").split('. ')
34
  bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
@@ -50,7 +105,7 @@ def make_str(val):
50
 
51
  def process_audio(audio_path):
52
  if not audio_path or not isinstance(audio_path, str):
53
- return ("No audio file provided.", "", "", "", "", "")
54
  try:
55
  with open(audio_path, "rb") as audio_file:
56
  transcript = openai.audio.transcriptions.create(
@@ -60,7 +115,7 @@ def process_audio(audio_path):
60
  )
61
  transcript = make_str(transcript).strip()
62
  except Exception as e:
63
- return (f"Error in transcription: {e}", "", "", "", "", "")
64
  try:
65
  detected_lang = detect(transcript)
66
  lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
@@ -86,13 +141,15 @@ def process_audio(audio_path):
86
  brands = extract_brands(transcript_en)
87
  topics = extract_topics(transcript_en)
88
  key_takeaways = make_bullets(summary)
 
89
  return (
90
  lang_text,
91
  transcript,
92
  transcript_en,
93
  ", ".join(brands),
94
  ", ".join(topics),
95
- key_takeaways
 
96
  )
97
 
98
  iface = gr.Interface(
@@ -104,10 +161,11 @@ iface = gr.Interface(
104
  gr.Textbox(label="English Transcript (if translated)"),
105
  gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"),
106
  gr.Textbox(label="Key Topics"),
107
- gr.Textbox(label="Bulleted Key Takeaways")
 
108
  ],
109
- title="Audio to Text & Insights Generation",
110
- description="Upload your audio file (MP3/WAV). Get key insights!"
111
  )
112
 
113
  iface.launch()
 
4
  from transformers import pipeline
5
  from keybert import KeyBERT
6
  import os
7
+ import re
8
 
9
  # --- SETUP ---
10
  openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
 
21
  "Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential"
22
  ]
23
 
24
+ NEGATIVE_KEYWORDS = [
25
+ kw.lower() for kw in [
26
+ "Assuring return", "Invest with us and earn", "Profit Share", "Password share",
27
+ "Unauthorised trade", "without consent order", "Fake advisor", "Arrest took money",
28
+ "False promise", "Raid", "Imposing Angel broking", "Impersonation angel one",
29
+ "Impersonation angel broking", "Fraud cheat", "Portfolio Management Service",
30
+ "Guarantee return", "Guaranteed return", "Tampered document", "Fake document",
31
+ "Forged document", "Promising huge return", "Ponzi Dabba", "Synchronised trade",
32
+ "Made huge profit", "Siphon amount", "Strategy During Market at Angel One",
33
+ "Account Handling", "Fixed Income from Market", "Weekly Expiry Make Money",
34
+ "Imposing Angel One", "Profit Share", "Profit Sharing", "Password Sharing",
35
+ "Password Share", "Unauthorized Trade", "Advisory Services", "Fake Avdisory",
36
+ "Arrest", "Took Money", "Fraud", "Cheat", "Portfolio Management Services", "PMS",
37
+ "Gurantee Return", "Guranteed Return", "Huge Return", "Ponzi", "Dabba",
38
+ "Make Huge Profit", "Siphon Amount", "Accout Handling", "Account Handling Services",
39
+ "Weekly Expire Make Money", "Account Handling聽", "Account Handle", "huge profit",
40
+ "advisor", "advisory", "assured return", "Premium Advice", "Free Advice",
41
+ "Free Advisory", "Life time free paid calls", "free paid calls", "paid calls",
42
+ "premium advisory", "Get Free Advice", "free calls with accuracy", "Free calls",
43
+ "Options Intraday Tips", "Equity call Intraday", "Equity call Intraday & Delivery",
44
+ "Equity call Delivery", "Premium advisor", "Gurantee Return Services",
45
+ "Guranteed Return Services", "advisor Services", "assured return Services",
46
+ "Premium Advice Services", "Free Advice Services", "Free Advisory Services",
47
+ "Life time free paid calls Services", "free paid calls Services", "paid calls Services",
48
+ "premium advisory Services", "Stock Recommendation", "Amount Doubling",
49
+ "Best Trade Level In Nifty, Bank Nifty With Accuracy", "Daily Accurate Calls",
50
+ "Earn Profit", "Expert Calls", "Fixe Profit Commitment", "Fixed Return", "For Jackpot Trade",
51
+ "Good Profits Daily", "Guaranteed Profit", "Paid Investment Plans", "Jackpot Call",
52
+ "Loss & Profit Sharing", "Nifty Bank-Nifty And Stock Option Calls .", "Pay & Get (Amount)",
53
+ "Sure Shot Calls", "Tips Provide", "Stock tips", "losses", "stock picks", "Multibagger picks",
54
+ "High return on investmentInsider Trading Offer/Scheme", "Advance Fee Fraud", "Pyramid Scheme",
55
+ "Boiler Room Scam", "Municipal Securities updates", "Churning offers", "Front Running Amount",
56
+ "Wash Trading Amount", "Bear Raiding", "Account Takeover", "Binary Options",
57
+ "Unregistered Securities", "High-Yield Investment Program", "Forex Amount", "Smurfing offers",
58
+ "Invest Quickly", "Trading account opening offer", "Discount on trading account",
59
+ "Bonus on Opening account", "Bull Capturing", "Confirmed Swing Options",
60
+ "Get Dividend every month", "Penny Stock recommendation", "Bawaal Stock Dhamaal return",
61
+ "From thousand to Crores portfolio", "Multibagger stock tips", "Best Over sold stocks",
62
+ "Best Over bought stocks", "High dividend yield stocks", "Future stock recommendation",
63
+ "Growth scanners", "Growth Screeners", "Bullish stock recommendation", "Bull stocks recommendation",
64
+ "Bearish stock recommendation", "Bear stocks recommendation"
65
+ ]
66
+ ]
67
+
68
  def extract_brands(text):
69
  found = [brand for brand in BRANDS if brand.lower() in text.lower()]
70
  return found if found else ["None detected"]
 
74
  topics = [kw for kw, score in keywords]
75
  return topics if topics else ["None extracted"]
76
 
77
+ def extract_negative_keywords(text):
78
+ if not text:
79
+ return []
80
+ text_lc = text.lower()
81
+ matches = []
82
+ for kw in NEGATIVE_KEYWORDS:
83
+ if kw in text_lc:
84
+ matches.append(kw)
85
+ return list(set(matches)) if matches else ["None detected"]
86
+
87
  def make_bullets(summary):
88
  sentences = summary.replace("\n", " ").split('. ')
89
  bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
 
105
 
106
  def process_audio(audio_path):
107
  if not audio_path or not isinstance(audio_path, str):
108
+ return ("No audio file provided.", "", "", "", "", "", "")
109
  try:
110
  with open(audio_path, "rb") as audio_file:
111
  transcript = openai.audio.transcriptions.create(
 
115
  )
116
  transcript = make_str(transcript).strip()
117
  except Exception as e:
118
+ return (f"Error in transcription: {e}", "", "", "", "", "", "")
119
  try:
120
  detected_lang = detect(transcript)
121
  lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
 
141
  brands = extract_brands(transcript_en)
142
  topics = extract_topics(transcript_en)
143
  key_takeaways = make_bullets(summary)
144
+ negatives = extract_negative_keywords(transcript_en)
145
  return (
146
  lang_text,
147
  transcript,
148
  transcript_en,
149
  ", ".join(brands),
150
  ", ".join(topics),
151
+ key_takeaways,
152
+ ", ".join(negatives)
153
  )
154
 
155
  iface = gr.Interface(
 
161
  gr.Textbox(label="English Transcript (if translated)"),
162
  gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"),
163
  gr.Textbox(label="Key Topics"),
164
+ gr.Textbox(label="Bulleted Key Takeaways"),
165
+ gr.Textbox(label="Negative Keywords Detected")
166
  ],
167
+ title="Audio Brand, Topic, and Scam Keyword Analysis for Indian Finance Apps",
168
+ description="Upload your audio file (MP3/WAV). Get transcript, summary, Indian brokerage/fintech brand & scam keyword detection, key topics, and a bulleted summary. Powered by OpenAI Whisper and BART."
169
  )
170
 
171
  iface.launch()