JUBJAI

Running

File size: 8,060 Bytes

import os
import gradio as gr
import pandas as pd
import numpy as np
import joblib
import spacy
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain_openai import ChatOpenAI

# --- Translator App ---
chat = ChatOpenAI()
class TextTranslator(BaseModel):
    output: str = Field(description="Python string containing the output text translated in the desired language")

output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
format_instructions = output_parser.get_format_instructions()

def text_translator(input_text : str, language : str) -> str:
    human_template = """Enter the text that you want to translate: 
                {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions)
    messages = prompt.to_messages()
    response = chat(messages = messages)
    output = output_parser.parse(response.content)
    return output.output

translator_tab = gr.Interface(fn=text_translator, 
    inputs=[gr.Textbox(label="Text to translate"), gr.Textbox(label="Target Language")],
    outputs=[gr.Textbox(label="Translated Text")],
    title="Text Translator")

# --- Sentiment Analysis App ---
sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
def sentiment_analysis(message, history):
    result = sentiment_model(message)
    return f"Sentiment: {result[0]['label']} (Probability: {result[0]['score']:.2f})"

sentiment_tab = gr.ChatInterface(fn=sentiment_analysis, title="Sentiment Analysis")

# --- Financial Analyst ---
spacy_model = spacy.load('en_core_web_sm')
spacy_model.add_pipe('sentencizer')
auth_token = os.environ.get("HF_Token")
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')

def split_in_sentences(text):
    doc = spacy_model(text)
    return [str(sent).strip() for sent in doc.sents]

def make_spans(text, results):
    return list(zip(split_in_sentences(text), [r["label"] for r in results]))

def speech_to_text(speech):
    return asr(speech)["text"]

def summarize_text(text):
    return summarizer(text)[0]['summary_text']

def text_to_sentiment(text):
    return fin_model(text)[0]["label"]

def fin_ext(text):
    results = fin_model(split_in_sentences(text))
    return make_spans(text, results)

def fls(text):
    fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
    results = fls_model(split_in_sentences(text))
    return make_spans(text, results)

def fin_ner(text):
    api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)
    return api(text)

financial_tab = gr.Blocks()
with financial_tab:
    gr.Markdown("## Financial Analyst AI")
    audio_file = gr.Audio(source="microphone", type="filepath")
    text = gr.Textbox(label="Recognized Text")
    summary = gr.Textbox(label="Summary")
    tone = gr.Label(label="Financial Tone")
    spans = gr.HighlightedText()
    fls_spans = gr.HighlightedText()
    ner_spans = gr.HighlightedText()
    with gr.Row():
        gr.Button("Recognize Speech").click(speech_to_text, inputs=audio_file, outputs=text)
        gr.Button("Summarize Text").click(summarize_text, inputs=text, outputs=summary)
        gr.Button("Classify Tone").click(text_to_sentiment, inputs=summary, outputs=tone)
    with gr.Row():
        gr.Button("Financial Sentiment").click(fin_ext, inputs=text, outputs=spans)
        gr.Button("Forward Looking").click(fls, inputs=text, outputs=fls_spans)
        gr.Button("NER Companies").click(fin_ner, inputs=text, outputs=ner_spans)

# --- Personal Information Detection ---
pii_tab = gr.load("models/iiiorg/piiranha-v1-detect-personal-information")

# --- Customer Churn ---
script_dir = os.path.dirname(os.path.abspath(__file__))
pipeline = joblib.load(os.path.join(script_dir, 'toolkit', 'pipeline.joblib'))
model = joblib.load(os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib'))

def calculate_total_charges(tenure, monthly_charges):
    return tenure * monthly_charges

def predict_churn(SeniorCitizen, Partner, Dependents, tenure,
            InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
            StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
            MonthlyCharges):
    TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
    input_df = pd.DataFrame({
        'SeniorCitizen': [SeniorCitizen],
        'Partner': [Partner],
        'Dependents': [Dependents],
        'tenure': [tenure],
        'InternetService': [InternetService],
        'OnlineSecurity': [OnlineSecurity],
        'OnlineBackup': [OnlineBackup],
        'DeviceProtection': [DeviceProtection],
        'TechSupport': [TechSupport],
        'StreamingTV': [StreamingTV],
        'StreamingMovies': [StreamingMovies],
        'Contract': [Contract],
        'PaperlessBilling': [PaperlessBilling],
        'PaymentMethod': [PaymentMethod],
        'MonthlyCharges': [MonthlyCharges],
        'TotalCharges': [TotalCharges]
    })
    X_processed = pipeline.transform(input_df)
    cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
    cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
    feature_names = [col for col in input_df.columns if input_df[col].dtype != 'object'] + list(cat_encoder.get_feature_names_out(cat_cols))
    final_df = pd.DataFrame(X_processed, columns=feature_names)
    final_df = pd.concat([final_df.iloc[:, 3:], final_df.iloc[:, :3]], axis=1)
    prediction_probs = model.predict_proba(final_df)[0]
    return {
        "Prediction: CHURN ð´": prediction_probs[1],
        "Prediction: STAY â": prediction_probs[0]
    }

churn_tab = gr.Interface(
    fn=predict_churn,
    inputs=[
        gr.Radio(['Yes', 'No'], label="Senior Citizen"),
        gr.Radio(['Yes', 'No'], label="Partner"),
        gr.Radio(['No', 'Yes'], label="Dependents"),
        gr.Slider(1, 73, step=1, label="Tenure (months)"),
        gr.Radio(['DSL', 'Fiber optic', 'No Internet'], label="Internet Service"),
        gr.Radio(['No', 'Yes'], label="Online Security"),
        gr.Radio(['No', 'Yes'], label="Online Backup"),
        gr.Radio(['No', 'Yes'], label="Device Protection"),
        gr.Radio(['No', 'Yes'], label="Tech Support"),
        gr.Radio(['No', 'Yes'], label="Streaming TV"),
        gr.Radio(['No', 'Yes'], label="Streaming Movies"),
        gr.Radio(['Month-to-month', 'One year', 'Two year'], label="Contract"),
        gr.Radio(['Yes', 'No'], label="Paperless Billing"),
        gr.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label="Payment Method"),
        gr.Slider(18.4, 118.65, label="Monthly Charges")
    ],
    outputs=gr.Label(label="Prediction"),
    title="Customer Churn Prediction"
)

# --- Launching All Tabs ---
demo = gr.TabbedInterface(
    interface_list=[
        translator_tab,
        sentiment_tab,
        financial_tab,
        pii_tab,
        churn_tab
    ],
    tab_names=[
        "Translator",
        "Sentiment Analysis",
        "Financial Analyst",
        "Personal Info Detection",
        "Customer Churn"
    ]
)

if __name__ == '__main__':
    demo.launch()