Spaces:
Running
Running
import os | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import joblib | |
import spacy | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification | |
from langchain_core.pydantic_v1 import BaseModel, Field | |
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate | |
from langchain.output_parsers import PydanticOutputParser | |
from langchain_openai import ChatOpenAI | |
# --- Translator App --- | |
chat = ChatOpenAI() | |
class TextTranslator(BaseModel): | |
output: str = Field(description="Python string containing the output text translated in the desired language") | |
output_parser = PydanticOutputParser(pydantic_object=TextTranslator) | |
format_instructions = output_parser.get_format_instructions() | |
def text_translator(input_text : str, language : str) -> str: | |
human_template = """Enter the text that you want to translate: | |
{input_text}, and enter the language that you want it to translate to {language}. {format_instructions}""" | |
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) | |
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt]) | |
prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions) | |
messages = prompt.to_messages() | |
response = chat(messages = messages) | |
output = output_parser.parse(response.content) | |
return output.output | |
translator_tab = gr.Interface(fn=text_translator, | |
inputs=[gr.Textbox(label="Text to translate"), gr.Textbox(label="Target Language")], | |
outputs=[gr.Textbox(label="Translated Text")], | |
title="Text Translator") | |
# --- Sentiment Analysis App --- | |
sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment") | |
def sentiment_analysis(message, history): | |
result = sentiment_model(message) | |
return f"Sentiment: {result[0]['label']} (Probability: {result[0]['score']:.2f})" | |
sentiment_tab = gr.ChatInterface(fn=sentiment_analysis, title="Sentiment Analysis") | |
# --- Financial Analyst --- | |
spacy_model = spacy.load('en_core_web_sm') | |
spacy_model.add_pipe('sentencizer') | |
auth_token = os.environ.get("HF_Token") | |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") | |
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") | |
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone') | |
def split_in_sentences(text): | |
doc = spacy_model(text) | |
return [str(sent).strip() for sent in doc.sents] | |
def make_spans(text, results): | |
return list(zip(split_in_sentences(text), [r["label"] for r in results])) | |
def speech_to_text(speech): | |
return asr(speech)["text"] | |
def summarize_text(text): | |
return summarizer(text)[0]['summary_text'] | |
def text_to_sentiment(text): | |
return fin_model(text)[0]["label"] | |
def fin_ext(text): | |
results = fin_model(split_in_sentences(text)) | |
return make_spans(text, results) | |
def fls(text): | |
fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token) | |
results = fls_model(split_in_sentences(text)) | |
return make_spans(text, results) | |
def fin_ner(text): | |
api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token) | |
return api(text) | |
financial_tab = gr.Blocks() | |
with financial_tab: | |
gr.Markdown("## Financial Analyst AI") | |
audio_file = gr.Audio(source="microphone", type="filepath") | |
text = gr.Textbox(label="Recognized Text") | |
summary = gr.Textbox(label="Summary") | |
tone = gr.Label(label="Financial Tone") | |
spans = gr.HighlightedText() | |
fls_spans = gr.HighlightedText() | |
ner_spans = gr.HighlightedText() | |
with gr.Row(): | |
gr.Button("Recognize Speech").click(speech_to_text, inputs=audio_file, outputs=text) | |
gr.Button("Summarize Text").click(summarize_text, inputs=text, outputs=summary) | |
gr.Button("Classify Tone").click(text_to_sentiment, inputs=summary, outputs=tone) | |
with gr.Row(): | |
gr.Button("Financial Sentiment").click(fin_ext, inputs=text, outputs=spans) | |
gr.Button("Forward Looking").click(fls, inputs=text, outputs=fls_spans) | |
gr.Button("NER Companies").click(fin_ner, inputs=text, outputs=ner_spans) | |
# --- Personal Information Detection --- | |
pii_tab = gr.load("models/iiiorg/piiranha-v1-detect-personal-information") | |
# --- Customer Churn --- | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
pipeline = joblib.load(os.path.join(script_dir, 'toolkit', 'pipeline.joblib')) | |
model = joblib.load(os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib')) | |
def calculate_total_charges(tenure, monthly_charges): | |
return tenure * monthly_charges | |
def predict_churn(SeniorCitizen, Partner, Dependents, tenure, | |
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, | |
MonthlyCharges): | |
TotalCharges = calculate_total_charges(tenure, MonthlyCharges) | |
input_df = pd.DataFrame({ | |
'SeniorCitizen': [SeniorCitizen], | |
'Partner': [Partner], | |
'Dependents': [Dependents], | |
'tenure': [tenure], | |
'InternetService': [InternetService], | |
'OnlineSecurity': [OnlineSecurity], | |
'OnlineBackup': [OnlineBackup], | |
'DeviceProtection': [DeviceProtection], | |
'TechSupport': [TechSupport], | |
'StreamingTV': [StreamingTV], | |
'StreamingMovies': [StreamingMovies], | |
'Contract': [Contract], | |
'PaperlessBilling': [PaperlessBilling], | |
'PaymentMethod': [PaymentMethod], | |
'MonthlyCharges': [MonthlyCharges], | |
'TotalCharges': [TotalCharges] | |
}) | |
X_processed = pipeline.transform(input_df) | |
cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'] | |
cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object'] | |
feature_names = [col for col in input_df.columns if input_df[col].dtype != 'object'] + list(cat_encoder.get_feature_names_out(cat_cols)) | |
final_df = pd.DataFrame(X_processed, columns=feature_names) | |
final_df = pd.concat([final_df.iloc[:, 3:], final_df.iloc[:, :3]], axis=1) | |
prediction_probs = model.predict_proba(final_df)[0] | |
return { | |
"Prediction: CHURN ð´": prediction_probs[1], | |
"Prediction: STAY â ": prediction_probs[0] | |
} | |
churn_tab = gr.Interface( | |
fn=predict_churn, | |
inputs=[ | |
gr.Radio(['Yes', 'No'], label="Senior Citizen"), | |
gr.Radio(['Yes', 'No'], label="Partner"), | |
gr.Radio(['No', 'Yes'], label="Dependents"), | |
gr.Slider(1, 73, step=1, label="Tenure (months)"), | |
gr.Radio(['DSL', 'Fiber optic', 'No Internet'], label="Internet Service"), | |
gr.Radio(['No', 'Yes'], label="Online Security"), | |
gr.Radio(['No', 'Yes'], label="Online Backup"), | |
gr.Radio(['No', 'Yes'], label="Device Protection"), | |
gr.Radio(['No', 'Yes'], label="Tech Support"), | |
gr.Radio(['No', 'Yes'], label="Streaming TV"), | |
gr.Radio(['No', 'Yes'], label="Streaming Movies"), | |
gr.Radio(['Month-to-month', 'One year', 'Two year'], label="Contract"), | |
gr.Radio(['Yes', 'No'], label="Paperless Billing"), | |
gr.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label="Payment Method"), | |
gr.Slider(18.4, 118.65, label="Monthly Charges") | |
], | |
outputs=gr.Label(label="Prediction"), | |
title="Customer Churn Prediction" | |
) | |
# --- Launching All Tabs --- | |
demo = gr.TabbedInterface( | |
interface_list=[ | |
translator_tab, | |
sentiment_tab, | |
financial_tab, | |
pii_tab, | |
churn_tab | |
], | |
tab_names=[ | |
"Translator", | |
"Sentiment Analysis", | |
"Financial Analyst", | |
"Personal Info Detection", | |
"Customer Churn" | |
] | |
) | |
if __name__ == '__main__': | |
demo.launch() | |