Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from langdetect import detect
|
|
4 |
from transformers import pipeline
|
5 |
from keybert import KeyBERT
|
6 |
import os
|
|
|
7 |
|
8 |
# --- SETUP ---
|
9 |
openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
|
@@ -20,6 +21,50 @@ BRANDS = [
|
|
20 |
"Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential"
|
21 |
]
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def extract_brands(text):
|
24 |
found = [brand for brand in BRANDS if brand.lower() in text.lower()]
|
25 |
return found if found else ["None detected"]
|
@@ -29,6 +74,16 @@ def extract_topics(text, top_n=5):
|
|
29 |
topics = [kw for kw, score in keywords]
|
30 |
return topics if topics else ["None extracted"]
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def make_bullets(summary):
|
33 |
sentences = summary.replace("\n", " ").split('. ')
|
34 |
bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
|
@@ -50,7 +105,7 @@ def make_str(val):
|
|
50 |
|
51 |
def process_audio(audio_path):
|
52 |
if not audio_path or not isinstance(audio_path, str):
|
53 |
-
return ("No audio file provided.", "", "", "", "", "")
|
54 |
try:
|
55 |
with open(audio_path, "rb") as audio_file:
|
56 |
transcript = openai.audio.transcriptions.create(
|
@@ -60,7 +115,7 @@ def process_audio(audio_path):
|
|
60 |
)
|
61 |
transcript = make_str(transcript).strip()
|
62 |
except Exception as e:
|
63 |
-
return (f"Error in transcription: {e}", "", "", "", "", "")
|
64 |
try:
|
65 |
detected_lang = detect(transcript)
|
66 |
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
|
@@ -86,13 +141,15 @@ def process_audio(audio_path):
|
|
86 |
brands = extract_brands(transcript_en)
|
87 |
topics = extract_topics(transcript_en)
|
88 |
key_takeaways = make_bullets(summary)
|
|
|
89 |
return (
|
90 |
lang_text,
|
91 |
transcript,
|
92 |
transcript_en,
|
93 |
", ".join(brands),
|
94 |
", ".join(topics),
|
95 |
-
key_takeaways
|
|
|
96 |
)
|
97 |
|
98 |
iface = gr.Interface(
|
@@ -104,10 +161,11 @@ iface = gr.Interface(
|
|
104 |
gr.Textbox(label="English Transcript (if translated)"),
|
105 |
gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"),
|
106 |
gr.Textbox(label="Key Topics"),
|
107 |
-
gr.Textbox(label="Bulleted Key Takeaways")
|
|
|
108 |
],
|
109 |
-
title="Audio
|
110 |
-
description="Upload your audio file (MP3/WAV). Get key
|
111 |
)
|
112 |
|
113 |
iface.launch()
|
|
|
4 |
from transformers import pipeline
|
5 |
from keybert import KeyBERT
|
6 |
import os
|
7 |
+
import re
|
8 |
|
9 |
# --- SETUP ---
|
10 |
openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets
|
|
|
21 |
"Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential"
|
22 |
]
|
23 |
|
24 |
+
NEGATIVE_KEYWORDS = [
|
25 |
+
kw.lower() for kw in [
|
26 |
+
"Assuring return", "Invest with us and earn", "Profit Share", "Password share",
|
27 |
+
"Unauthorised trade", "without consent order", "Fake advisor", "Arrest took money",
|
28 |
+
"False promise", "Raid", "Imposing Angel broking", "Impersonation angel one",
|
29 |
+
"Impersonation angel broking", "Fraud cheat", "Portfolio Management Service",
|
30 |
+
"Guarantee return", "Guaranteed return", "Tampered document", "Fake document",
|
31 |
+
"Forged document", "Promising huge return", "Ponzi Dabba", "Synchronised trade",
|
32 |
+
"Made huge profit", "Siphon amount", "Strategy During Market at Angel One",
|
33 |
+
"Account Handling", "Fixed Income from Market", "Weekly Expiry Make Money",
|
34 |
+
"Imposing Angel One", "Profit Share", "Profit Sharing", "Password Sharing",
|
35 |
+
"Password Share", "Unauthorized Trade", "Advisory Services", "Fake Avdisory",
|
36 |
+
"Arrest", "Took Money", "Fraud", "Cheat", "Portfolio Management Services", "PMS",
|
37 |
+
"Gurantee Return", "Guranteed Return", "Huge Return", "Ponzi", "Dabba",
|
38 |
+
"Make Huge Profit", "Siphon Amount", "Accout Handling", "Account Handling Services",
|
39 |
+
"Weekly Expire Make Money", "Account Handling聽", "Account Handle", "huge profit",
|
40 |
+
"advisor", "advisory", "assured return", "Premium Advice", "Free Advice",
|
41 |
+
"Free Advisory", "Life time free paid calls", "free paid calls", "paid calls",
|
42 |
+
"premium advisory", "Get Free Advice", "free calls with accuracy", "Free calls",
|
43 |
+
"Options Intraday Tips", "Equity call Intraday", "Equity call Intraday & Delivery",
|
44 |
+
"Equity call Delivery", "Premium advisor", "Gurantee Return Services",
|
45 |
+
"Guranteed Return Services", "advisor Services", "assured return Services",
|
46 |
+
"Premium Advice Services", "Free Advice Services", "Free Advisory Services",
|
47 |
+
"Life time free paid calls Services", "free paid calls Services", "paid calls Services",
|
48 |
+
"premium advisory Services", "Stock Recommendation", "Amount Doubling",
|
49 |
+
"Best Trade Level In Nifty, Bank Nifty With Accuracy", "Daily Accurate Calls",
|
50 |
+
"Earn Profit", "Expert Calls", "Fixe Profit Commitment", "Fixed Return", "For Jackpot Trade",
|
51 |
+
"Good Profits Daily", "Guaranteed Profit", "Paid Investment Plans", "Jackpot Call",
|
52 |
+
"Loss & Profit Sharing", "Nifty Bank-Nifty And Stock Option Calls .", "Pay & Get (Amount)",
|
53 |
+
"Sure Shot Calls", "Tips Provide", "Stock tips", "losses", "stock picks", "Multibagger picks",
|
54 |
+
"High return on investmentInsider Trading Offer/Scheme", "Advance Fee Fraud", "Pyramid Scheme",
|
55 |
+
"Boiler Room Scam", "Municipal Securities updates", "Churning offers", "Front Running Amount",
|
56 |
+
"Wash Trading Amount", "Bear Raiding", "Account Takeover", "Binary Options",
|
57 |
+
"Unregistered Securities", "High-Yield Investment Program", "Forex Amount", "Smurfing offers",
|
58 |
+
"Invest Quickly", "Trading account opening offer", "Discount on trading account",
|
59 |
+
"Bonus on Opening account", "Bull Capturing", "Confirmed Swing Options",
|
60 |
+
"Get Dividend every month", "Penny Stock recommendation", "Bawaal Stock Dhamaal return",
|
61 |
+
"From thousand to Crores portfolio", "Multibagger stock tips", "Best Over sold stocks",
|
62 |
+
"Best Over bought stocks", "High dividend yield stocks", "Future stock recommendation",
|
63 |
+
"Growth scanners", "Growth Screeners", "Bullish stock recommendation", "Bull stocks recommendation",
|
64 |
+
"Bearish stock recommendation", "Bear stocks recommendation"
|
65 |
+
]
|
66 |
+
]
|
67 |
+
|
68 |
def extract_brands(text):
|
69 |
found = [brand for brand in BRANDS if brand.lower() in text.lower()]
|
70 |
return found if found else ["None detected"]
|
|
|
74 |
topics = [kw for kw, score in keywords]
|
75 |
return topics if topics else ["None extracted"]
|
76 |
|
77 |
+
def extract_negative_keywords(text):
|
78 |
+
if not text:
|
79 |
+
return []
|
80 |
+
text_lc = text.lower()
|
81 |
+
matches = []
|
82 |
+
for kw in NEGATIVE_KEYWORDS:
|
83 |
+
if kw in text_lc:
|
84 |
+
matches.append(kw)
|
85 |
+
return list(set(matches)) if matches else ["None detected"]
|
86 |
+
|
87 |
def make_bullets(summary):
|
88 |
sentences = summary.replace("\n", " ").split('. ')
|
89 |
bullets = [f"- {s.strip()}" for s in sentences if s.strip()]
|
|
|
105 |
|
106 |
def process_audio(audio_path):
|
107 |
if not audio_path or not isinstance(audio_path, str):
|
108 |
+
return ("No audio file provided.", "", "", "", "", "", "")
|
109 |
try:
|
110 |
with open(audio_path, "rb") as audio_file:
|
111 |
transcript = openai.audio.transcriptions.create(
|
|
|
115 |
)
|
116 |
transcript = make_str(transcript).strip()
|
117 |
except Exception as e:
|
118 |
+
return (f"Error in transcription: {e}", "", "", "", "", "", "")
|
119 |
try:
|
120 |
detected_lang = detect(transcript)
|
121 |
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang)
|
|
|
141 |
brands = extract_brands(transcript_en)
|
142 |
topics = extract_topics(transcript_en)
|
143 |
key_takeaways = make_bullets(summary)
|
144 |
+
negatives = extract_negative_keywords(transcript_en)
|
145 |
return (
|
146 |
lang_text,
|
147 |
transcript,
|
148 |
transcript_en,
|
149 |
", ".join(brands),
|
150 |
", ".join(topics),
|
151 |
+
key_takeaways,
|
152 |
+
", ".join(negatives)
|
153 |
)
|
154 |
|
155 |
iface = gr.Interface(
|
|
|
161 |
gr.Textbox(label="English Transcript (if translated)"),
|
162 |
gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"),
|
163 |
gr.Textbox(label="Key Topics"),
|
164 |
+
gr.Textbox(label="Bulleted Key Takeaways"),
|
165 |
+
gr.Textbox(label="Negative Keywords Detected")
|
166 |
],
|
167 |
+
title="Audio Brand, Topic, and Scam Keyword Analysis for Indian Finance Apps",
|
168 |
+
description="Upload your audio file (MP3/WAV). Get transcript, summary, Indian brokerage/fintech brand & scam keyword detection, key topics, and a bulleted summary. Powered by OpenAI Whisper and BART."
|
169 |
)
|
170 |
|
171 |
iface.launch()
|