File size: 8,060 Bytes
6993212
ce55ba8
 
 
6993212
 
d03ac60
6993212
 
 
 
 
d03ac60
6993212
 
d03ac60
6993212
 
 
 
d03ac60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6993212
 
 
d03ac60
 
 
 
 
 
 
 
 
 
 
6993212
 
d03ac60
 
6993212
 
d03ac60
6993212
d03ac60
 
6993212
 
 
 
 
 
 
 
d03ac60
 
6993212
 
d03ac60
 
 
6993212
d03ac60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce55ba8
d03ac60
 
ce55ba8
 
 
 
d03ac60
ce55ba8
 
 
6993212
ce55ba8
d03ac60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce55ba8
d03ac60
 
 
 
ce55ba8
d03ac60
 
6993212
d03ac60
 
f9a5f05
ce55ba8
d03ac60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6993212
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import os
import gradio as gr
import pandas as pd
import numpy as np
import joblib
import spacy
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain_openai import ChatOpenAI

# --- Translator App ---
chat = ChatOpenAI()
class TextTranslator(BaseModel):
    output: str = Field(description="Python string containing the output text translated in the desired language")

output_parser = PydanticOutputParser(pydantic_object=TextTranslator)
format_instructions = output_parser.get_format_instructions()

def text_translator(input_text : str, language : str) -> str:
    human_template = """Enter the text that you want to translate: 
                {input_text}, and enter the language that you want it to translate to {language}. {format_instructions}"""
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions)
    messages = prompt.to_messages()
    response = chat(messages = messages)
    output = output_parser.parse(response.content)
    return output.output

translator_tab = gr.Interface(fn=text_translator, 
    inputs=[gr.Textbox(label="Text to translate"), gr.Textbox(label="Target Language")],
    outputs=[gr.Textbox(label="Translated Text")],
    title="Text Translator")

# --- Sentiment Analysis App ---
sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
def sentiment_analysis(message, history):
    result = sentiment_model(message)
    return f"Sentiment: {result[0]['label']} (Probability: {result[0]['score']:.2f})"

sentiment_tab = gr.ChatInterface(fn=sentiment_analysis, title="Sentiment Analysis")

# --- Financial Analyst ---
spacy_model = spacy.load('en_core_web_sm')
spacy_model.add_pipe('sentencizer')
auth_token = os.environ.get("HF_Token")
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')

def split_in_sentences(text):
    doc = spacy_model(text)
    return [str(sent).strip() for sent in doc.sents]

def make_spans(text, results):
    return list(zip(split_in_sentences(text), [r["label"] for r in results]))

def speech_to_text(speech):
    return asr(speech)["text"]

def summarize_text(text):
    return summarizer(text)[0]['summary_text']

def text_to_sentiment(text):
    return fin_model(text)[0]["label"]

def fin_ext(text):
    results = fin_model(split_in_sentences(text))
    return make_spans(text, results)

def fls(text):
    fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token)
    results = fls_model(split_in_sentences(text))
    return make_spans(text, results)

def fin_ner(text):
    api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token)
    return api(text)

financial_tab = gr.Blocks()
with financial_tab:
    gr.Markdown("## Financial Analyst AI")
    audio_file = gr.Audio(source="microphone", type="filepath")
    text = gr.Textbox(label="Recognized Text")
    summary = gr.Textbox(label="Summary")
    tone = gr.Label(label="Financial Tone")
    spans = gr.HighlightedText()
    fls_spans = gr.HighlightedText()
    ner_spans = gr.HighlightedText()
    with gr.Row():
        gr.Button("Recognize Speech").click(speech_to_text, inputs=audio_file, outputs=text)
        gr.Button("Summarize Text").click(summarize_text, inputs=text, outputs=summary)
        gr.Button("Classify Tone").click(text_to_sentiment, inputs=summary, outputs=tone)
    with gr.Row():
        gr.Button("Financial Sentiment").click(fin_ext, inputs=text, outputs=spans)
        gr.Button("Forward Looking").click(fls, inputs=text, outputs=fls_spans)
        gr.Button("NER Companies").click(fin_ner, inputs=text, outputs=ner_spans)

# --- Personal Information Detection ---
pii_tab = gr.load("models/iiiorg/piiranha-v1-detect-personal-information")

# --- Customer Churn ---
script_dir = os.path.dirname(os.path.abspath(__file__))
pipeline = joblib.load(os.path.join(script_dir, 'toolkit', 'pipeline.joblib'))
model = joblib.load(os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib'))

def calculate_total_charges(tenure, monthly_charges):
    return tenure * monthly_charges

def predict_churn(SeniorCitizen, Partner, Dependents, tenure,
            InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
            StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod,
            MonthlyCharges):
    TotalCharges = calculate_total_charges(tenure, MonthlyCharges)
    input_df = pd.DataFrame({
        'SeniorCitizen': [SeniorCitizen],
        'Partner': [Partner],
        'Dependents': [Dependents],
        'tenure': [tenure],
        'InternetService': [InternetService],
        'OnlineSecurity': [OnlineSecurity],
        'OnlineBackup': [OnlineBackup],
        'DeviceProtection': [DeviceProtection],
        'TechSupport': [TechSupport],
        'StreamingTV': [StreamingTV],
        'StreamingMovies': [StreamingMovies],
        'Contract': [Contract],
        'PaperlessBilling': [PaperlessBilling],
        'PaymentMethod': [PaymentMethod],
        'MonthlyCharges': [MonthlyCharges],
        'TotalCharges': [TotalCharges]
    })
    X_processed = pipeline.transform(input_df)
    cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
    cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object']
    feature_names = [col for col in input_df.columns if input_df[col].dtype != 'object'] + list(cat_encoder.get_feature_names_out(cat_cols))
    final_df = pd.DataFrame(X_processed, columns=feature_names)
    final_df = pd.concat([final_df.iloc[:, 3:], final_df.iloc[:, :3]], axis=1)
    prediction_probs = model.predict_proba(final_df)[0]
    return {
        "Prediction: CHURN 🔴": prediction_probs[1],
        "Prediction: STAY ✅": prediction_probs[0]
    }

churn_tab = gr.Interface(
    fn=predict_churn,
    inputs=[
        gr.Radio(['Yes', 'No'], label="Senior Citizen"),
        gr.Radio(['Yes', 'No'], label="Partner"),
        gr.Radio(['No', 'Yes'], label="Dependents"),
        gr.Slider(1, 73, step=1, label="Tenure (months)"),
        gr.Radio(['DSL', 'Fiber optic', 'No Internet'], label="Internet Service"),
        gr.Radio(['No', 'Yes'], label="Online Security"),
        gr.Radio(['No', 'Yes'], label="Online Backup"),
        gr.Radio(['No', 'Yes'], label="Device Protection"),
        gr.Radio(['No', 'Yes'], label="Tech Support"),
        gr.Radio(['No', 'Yes'], label="Streaming TV"),
        gr.Radio(['No', 'Yes'], label="Streaming Movies"),
        gr.Radio(['Month-to-month', 'One year', 'Two year'], label="Contract"),
        gr.Radio(['Yes', 'No'], label="Paperless Billing"),
        gr.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label="Payment Method"),
        gr.Slider(18.4, 118.65, label="Monthly Charges")
    ],
    outputs=gr.Label(label="Prediction"),
    title="Customer Churn Prediction"
)

# --- Launching All Tabs ---
demo = gr.TabbedInterface(
    interface_list=[
        translator_tab,
        sentiment_tab,
        financial_tab,
        pii_tab,
        churn_tab
    ],
    tab_names=[
        "Translator",
        "Sentiment Analysis",
        "Financial Analyst",
        "Personal Info Detection",
        "Customer Churn"
    ]
)

if __name__ == '__main__':
    demo.launch()