Spaces:
Running
Running
import gradio as gr | |
import openai | |
from langdetect import detect | |
from transformers import pipeline | |
from keybert import KeyBERT | |
import os | |
# --- SETUP --- | |
openai.api_key = os.getenv("OPENAI_API_KEY") # Set in HF Space Secrets | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
kw_model = KeyBERT() | |
# Key Indian brokerages, investment apps, and fintech brands | |
BRANDS = [ | |
"Zerodha", "Upstox", "Groww", "Angel One", "Motilal Oswal", "Sharekhan", "5paisa", "ICICI Direct", | |
"HDFC Securities", "Kotak Securities", "Axis Direct", "IIFL", "Paytm Money", "Edelweiss", "Geojit", | |
"Fyers", "Alice Blue", "mStock", "Stockal", "Kuvera", "Smallcase", "Jupiter", "Fi", "INDmoney", | |
"PhonePe", "Paytm", "Google Pay", "BHIM", "MobiKwik", "Cred", "Niyo", "Razorpay", "ETMoney", | |
"Bajaj Finserv", "SBI Securities", "YES Securities", "IDFC FIRST", "CAMS", "Karvy", "LIC", "ICICI Prudential" | |
] | |
def extract_brands(text): | |
found = [brand for brand in BRANDS if brand.lower() in text.lower()] | |
return found if found else ["None detected"] | |
def extract_topics(text, top_n=5): | |
keywords = kw_model.extract_keywords(text, top_n=top_n, stop_words='english') | |
topics = [kw for kw, score in keywords] | |
return topics if topics else ["None extracted"] | |
def make_bullets(summary): | |
sentences = summary.replace("\n", " ").split('. ') | |
bullets = [f"- {s.strip()}" for s in sentences if s.strip()] | |
return "\n".join(bullets) | |
def make_str(val): | |
try: | |
if val is None: | |
return "" | |
if isinstance(val, (bool, int, float)): | |
return str(val) | |
if isinstance(val, list): | |
return "\n".join([make_str(v) for v in val]) | |
if isinstance(val, dict): | |
return str(val) | |
return str(val) | |
except Exception: | |
return "" | |
def process_audio(audio_path): | |
if not audio_path or not isinstance(audio_path, str): | |
return ("No audio file provided.", "", "", "", "", "") | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript = make_str(transcript).strip() | |
except Exception as e: | |
return (f"Error in transcription: {e}", "", "", "", "", "") | |
try: | |
detected_lang = detect(transcript) | |
lang_text = {'en': 'English', 'hi': 'Hindi', 'ta': 'Tamil'}.get(detected_lang, detected_lang) | |
except Exception: | |
lang_text = "unknown" | |
transcript_en = transcript | |
if detected_lang != "en": | |
try: | |
with open(audio_path, "rb") as audio_file: | |
transcript_en = openai.audio.translations.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
transcript_en = make_str(transcript_en).strip() | |
except Exception as e: | |
transcript_en = f"Error translating: {e}" | |
try: | |
summary_obj = summarizer(transcript_en, max_length=100, min_length=30, do_sample=False) | |
summary = summary_obj[0]["summary_text"] if isinstance(summary_obj, list) and "summary_text" in summary_obj[0] else make_str(summary_obj) | |
except Exception as e: | |
summary = f"Error summarizing: {e}" | |
brands = extract_brands(transcript_en) | |
topics = extract_topics(transcript_en) | |
key_takeaways = make_bullets(summary) | |
return ( | |
lang_text, | |
transcript, | |
transcript_en, | |
", ".join(brands), | |
", ".join(topics), | |
key_takeaways | |
) | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath", label="Upload MP3/WAV Audio"), | |
outputs=[ | |
gr.Textbox(label="Detected Language"), | |
gr.Textbox(label="Original Transcript"), | |
gr.Textbox(label="English Transcript (if translated)"), | |
gr.Textbox(label="Indian Brokerages & Fintech Brands Detected"), | |
gr.Textbox(label="Key Topics"), | |
gr.Textbox(label="Bulleted Key Takeaways") | |
], | |
title="Audio to Text & Insights Generation", | |
description="Upload your audio file (MP3/WAV). Get key insights!" | |
) | |
iface.launch() | |