Spaces:
Sleeping
Sleeping
import gradio as gr | |
import joblib | |
import re | |
import pandas as pd | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain_openai import ChatOpenAI | |
from langchain_core.output_parsers import StrOutputParser | |
# 1. Translator | |
class TextTranslator(BaseModel): | |
output: str = Field(description="Python string containing the output text translated in the desired language") | |
output_parser = PydanticOutputParser(pydantic_object=TextTranslator) | |
format_instructions = output_parser.get_format_instructions() | |
def text_translator(input_text : str, language : str) -> str: | |
human_template = """Enter the text that you want to translate: | |
{input_text}, and enter the language that you want it to translate to {language}. {format_instructions}""" | |
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) | |
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt]) | |
prompt = chat_prompt.format_prompt(input_text = input_text, language = language, format_instructions = format_instructions) | |
messages = prompt.to_messages() | |
response = chat(messages = messages) | |
output = output_parser.parse(response.content) | |
output_text = output.output | |
return output_text | |
# 2. Sentiment Analysis | |
classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment") | |
def sentiment_analysis(message, history): | |
""" | |
Función para analizar el sentimiento de un mensaje. | |
Retorna la etiqueta de sentimiento con su probabilidad. | |
""" | |
result = classifier(message) | |
return f"Sentimiento : {result[0]['label']} (Probabilidad: {result[0]['score']:.2f})" | |
# 3. Financial Analyst (LangChain with OpenAI, requires API key) | |
nlp = spacy.load('en_core_web_sm') | |
nlp.add_pipe('sentencizer') | |
def split_in_sentences(text): | |
doc = nlp(text) | |
return [str(sent).strip() for sent in doc.sents] | |
def make_spans(text,results): | |
results_list = [] | |
for i in range(len(results)): | |
results_list.append(results[i]['label']) | |
facts_spans = [] | |
facts_spans = list(zip(split_in_sentences(text),results_list)) | |
return facts_spans | |
auth_token = os.environ.get("HF_Token") | |
##Speech Recognition | |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") | |
def transcribe(audio): | |
text = asr(audio)["text"] | |
return text | |
def speech_to_text(speech): | |
text = asr(speech)["text"] | |
return text | |
##Summarization | |
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") | |
def summarize_text(text): | |
resp = summarizer(text) | |
stext = resp[0]['summary_text'] | |
return stext | |
##Fiscal Tone Analysis | |
fin_model= pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone') | |
def text_to_sentiment(text): | |
sentiment = fin_model(text)[0]["label"] | |
return sentiment | |
##Company Extraction | |
def fin_ner(text): | |
api = gr.Interface.load("dslim/bert-base-NER", src='models', use_auth_token=auth_token) | |
replaced_spans = api(text) | |
return replaced_spans | |
##Fiscal Sentiment by Sentence | |
def fin_ext(text): | |
results = fin_model(split_in_sentences(text)) | |
return make_spans(text,results) | |
##Forward Looking Statement | |
def fls(text): | |
# fls_model = pipeline("text-classification", model="yiyanghkust/finbert-fls", tokenizer="yiyanghkust/finbert-fls") | |
fls_model = pipeline("text-classification", model="demo-org/finbert_fls", tokenizer="demo-org/finbert_fls", use_auth_token=auth_token) | |
results = fls_model(split_in_sentences(text)) | |
return make_spans(text,results) | |
# 4. Personal Info Detection | |
def detect_pii(text): | |
pii_patterns = { | |
"email": r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", | |
"phone": r"\+?\d[\d\-\s]{8,}\d", | |
"credit_card": r"\b(?:\d[ -]*?){13,16}\b" | |
} | |
found = {} | |
for label, pattern in pii_patterns.items(): | |
matches = re.findall(pattern, text) | |
if matches: | |
found[label] = matches | |
return found or "No personal information found." | |
# 5. Telco Customer Churn Prediction | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
pipeline_path = os.path.join(script_dir, 'toolkit', 'pipeline.joblib') | |
model_path = os.path.join(script_dir, 'toolkit', 'Random Forest Classifier.joblib') | |
# Load transformation pipeline and model | |
pipeline = joblib.load(pipeline_path) | |
model = joblib.load(model_path) | |
# Create a function to calculate TotalCharges | |
def calculate_total_charges(tenure, monthly_charges): | |
return tenure * monthly_charges | |
# Create a function that applies the ML pipeline and makes predictions | |
def predict(SeniorCitizen, Partner, Dependents, tenure, | |
InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaperlessBilling, PaymentMethod, | |
MonthlyCharges): | |
# Calculate TotalCharges | |
TotalCharges = calculate_total_charges(tenure, MonthlyCharges) | |
# Create a dataframe with the input data | |
input_df = pd.DataFrame({ | |
'SeniorCitizen': [SeniorCitizen], | |
'Partner': [Partner], | |
'Dependents': [Dependents], | |
'tenure': [tenure], | |
'InternetService': [InternetService], | |
'OnlineSecurity': [OnlineSecurity], | |
'OnlineBackup': [OnlineBackup], | |
'DeviceProtection': [DeviceProtection], | |
'TechSupport': [TechSupport], | |
'StreamingTV': [StreamingTV], | |
'StreamingMovies': [StreamingMovies], | |
'Contract': [Contract], | |
'PaperlessBilling': [PaperlessBilling], | |
'PaymentMethod': [PaymentMethod], | |
'MonthlyCharges': [MonthlyCharges], | |
'TotalCharges': [TotalCharges] | |
}) | |
# Selecting categorical and numerical columns separately | |
cat_cols = [col for col in input_df.columns if input_df[col].dtype == 'object'] | |
num_cols = [col for col in input_df.columns if input_df[col].dtype != 'object'] | |
X_processed = pipeline.transform(input_df) | |
# Extracting feature names for categorical columns after one-hot encoding | |
cat_encoder = pipeline.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot'] | |
cat_feature_names = cat_encoder.get_feature_names_out(cat_cols) | |
# Concatenating numerical and categorical feature names | |
feature_names = num_cols + list(cat_feature_names) | |
# Convert X_processed to DataFrame | |
final_df = pd.DataFrame(X_processed, columns=feature_names) | |
# Extract the first three columns and remaining columns, then merge | |
first_three_columns = final_df.iloc[:, :3] | |
remaining_columns = final_df.iloc[:, 3:] | |
final_df = pd.concat([remaining_columns, first_three_columns], axis=1) | |
# Make predictions using the model | |
prediction_probs = model.predict_proba(final_df)[0] | |
prediction_label = { | |
"Prediction: CHURN 🔴": prediction_probs[1], | |
"Prediction: STAY ✅": prediction_probs[0] | |
} | |
return prediction_label | |
input_interface = [] | |
# Gradio UI setup | |
with gr.Blocks() as demo: | |
with gr.Tab("Translator"): | |
gr.HTML("<h1 align = 'center'> Text Translator </h1>") | |
gr.HTML("<h4 align = 'center'> Translate to any language </h4>") | |
inputs = [gr.Textbox(label = "Enter the text that you want to translate"), gr.Textbox(label = "Enter the language that you want it to translate to", placeholder = "Example : Hindi,French,Bengali,etc")] | |
generate_btn = gr.Button(value = 'Generate') | |
outputs = [gr.Textbox(label = "Translated text")] | |
generate_btn.click(fn = text_translator, inputs= inputs, outputs = outputs) | |
with gr.Tab("Sentiment Analysis"): | |
gr.Markdown(""" | |
# Análisis de Sentimientos | |
Esta aplicación utiliza un modelo de Machine Learning para analizar el sentimiento de los mensajes ingresados. | |
Puede detectar si un texto es positivo, negativo o neutral con su respectiva probabilidad. | |
""") | |
chat = gr.ChatInterface(sentiment_analysis, type="messages") | |
gr.Markdown(""" | |
--- | |
### Conéctate conmigo: | |
[Instagram 📸](https://www.instagram.com/srjosueaaron/) | |
[TikTok 🎵](https://www.tiktok.com/@srjosueaaron) | |
[YouTube 🎬](https://www.youtube.com/@srjosueaaron) | |
--- | |
Demostración de Análisis de Sentimientos usando el modelo de [CardiffNLP](https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment). | |
Desarrollado con ❤️ por [@srjosueaaron](https://www.instagram.com/srjosueaaron/). | |
""") | |
with gr.Tab("Financial Analyst"): | |
gr.Markdown("## Financial Analyst AI") | |
gr.Markdown("This project applies AI trained by our financial analysts to analyze earning calls and other financial documents.") | |
with gr.Row(): | |
with gr.Column(): | |
audio_file = gr.inputs.Audio(source="microphone", type="filepath") | |
with gr.Row(): | |
b1 = gr.Button("Recognize Speech") | |
with gr.Row(): | |
text = gr.Textbox(value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month. The department expects inflation to continue to rise.") | |
b1.click(speech_to_text, inputs=audio_file, outputs=text) | |
with gr.Row(): | |
b2 = gr.Button("Summarize Text") | |
stext = gr.Textbox() | |
b2.click(summarize_text, inputs=text, outputs=stext) | |
with gr.Row(): | |
b3 = gr.Button("Classify Financial Tone") | |
label = gr.Label() | |
b3.click(text_to_sentiment, inputs=stext, outputs=label) | |
with gr.Column(): | |
b5 = gr.Button("Financial Tone and Forward Looking Statement Analysis") | |
with gr.Row(): | |
fin_spans = gr.HighlightedText() | |
b5.click(fin_ext, inputs=text, outputs=fin_spans) | |
with gr.Row(): | |
fls_spans = gr.HighlightedText() | |
b5.click(fls, inputs=text, outputs=fls_spans) | |
with gr.Row(): | |
b4 = gr.Button("Identify Companies & Locations") | |
replaced_spans = gr.HighlightedText() | |
b4.click(fin_ner, inputs=text, outputs=replaced_spans) | |
with gr.Tab("PII Detector"): | |
pii_input = gr.Textbox(label="Text") | |
pii_output = gr.JSON(label="Detected PII") | |
pii_button = gr.Button("Detect") | |
pii_button.click(fn=detect_pii, inputs=pii_input, outputs=pii_output) | |
with gr.Tab("Telco Churn Predictor"): | |
Title = gr.Label('Customer Churn Prediction App') | |
with gr.Row(): | |
Title | |
with gr.Row(): | |
gr.Markdown("This app predicts likelihood of a customer to leave or stay with the company") | |
with gr.Row(): | |
with gr.Column(): | |
input_interface_column_1 = [ | |
gr.components.Radio(['Yes', 'No'], label="Are you a Seniorcitizen?"), | |
gr.components.Radio(['Yes', 'No'], label='Do you have Partner?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have any Dependents?'), | |
gr.components.Slider(label='Enter lenghth of Tenure in Months', minimum=1, maximum=73, step=1), | |
gr.components.Radio(['DSL', 'Fiber optic', 'No Internet'], label='What is your Internet Service?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have Online Security?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have Online Backup?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have Device Protection?') | |
] | |
with gr.Column(): | |
input_interface_column_2 = [ | |
gr.components.Radio(['No', 'Yes'], label='Do you have Tech Support?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have Streaming TV?'), | |
gr.components.Radio(['No', 'Yes'], label='Do you have Streaming Movies?'), | |
gr.components.Radio(['Month-to-month', 'One year', 'Two year'], label='What is your Contract Type?'), | |
gr.components.Radio(['Yes', 'No'], label='Do you prefer Paperless Billing?'), | |
gr.components.Radio(['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'], label='Which PaymentMethod do you prefer?'), | |
gr.components.Slider(label="Enter monthly charges", minimum=18.40, maximum=118.65) | |
] | |
with gr.Row(): | |
input_interface.extend(input_interface_column_1) | |
input_interface.extend(input_interface_column_2) | |
with gr.Row(): | |
predict_btn = gr.Button('Predict') | |
output_interface = gr.Label(label="churn") | |
with gr.Accordion("Open for information on inputs", open=False): | |
gr.Markdown("""This app receives the following as inputs and processes them to return the prediction on whether a customer, will churn or not. | |
- SeniorCitizen: Whether a customer is a senior citizen or not | |
- Partner: Whether the customer has a partner or not (Yes, No) | |
- Dependents: Whether the customer has dependents or not (Yes, No) | |
- Tenure: Number of months the customer has stayed with the company | |
- InternetService: Customer's internet service provider (DSL, Fiber Optic, No) | |
- OnlineSecurity: Whether the customer has online security or not (Yes, No, No Internet) | |
- OnlineBackup: Whether the customer has online backup or not (Yes, No, No Internet) | |
- DeviceProtection: Whether the customer has device protection or not (Yes, No, No internet service) | |
- TechSupport: Whether the customer has tech support or not (Yes, No, No internet) | |
- StreamingTV: Whether the customer has streaming TV or not (Yes, No, No internet service) | |
- StreamingMovies: Whether the customer has streaming movies or not (Yes, No, No Internet service) | |
- Contract: The contract term of the customer (Month-to-Month, One year, Two year) | |
- PaperlessBilling: Whether the customer has paperless billing or not (Yes, No) | |
- Payment Method: The customer's payment method (Electronic check, mailed check, Bank transfer(automatic), Credit card(automatic)) | |
- MonthlyCharges: The amount charged to the customer monthly | |
""") | |
predict_btn.click(fn=predict, inputs=input_interface, outputs=output_interface) | |
demo.launch() | |