|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import plotly.express as px |
|
import streamlit as st |
|
from transformers import pipeline |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv") |
|
|
|
if uploaded_file is not None: |
|
|
|
df = pd.read_csv(uploaded_file) |
|
|
|
|
|
st.write("Columns in the uploaded file:", df.columns) |
|
|
|
|
|
if 'Description' not in df.columns: |
|
st.error("Error: The CSV file does not contain a 'Description' column.") |
|
else: |
|
|
|
model_name = 'distilbert-base-uncased' |
|
classifier = pipeline('zero-shot-classification', model=model_name) |
|
|
|
|
|
categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"] |
|
|
|
|
|
def categorize_expense(description): |
|
result = classifier(description, candidate_labels=categories) |
|
return result['labels'][0] |
|
|
|
|
|
df['Category'] = df['Description'].apply(categorize_expense) |
|
|
|
|
|
st.write("Categorized Data:", df.head()) |
|
|
|
|
|
category_expenses = df.groupby('Category')['Amount'].sum() |
|
|
|
|
|
fig1 = plt.figure(figsize=(8, 8)) |
|
category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors) |
|
plt.title('Expense Distribution by Category') |
|
plt.ylabel('') |
|
st.pyplot(fig1) |
|
|
|
|
|
|
|
df['Date'] = pd.to_datetime(df['Date']) |
|
|
|
|
|
df['Month'] = df['Date'].dt.to_period('M').astype(str) |
|
|
|
|
|
monthly_expenses = df.groupby('Month')['Amount'].sum() |
|
|
|
|
|
fig2 = px.line( |
|
monthly_expenses, |
|
x=monthly_expenses.index, |
|
y=monthly_expenses.values, |
|
title="Monthly Expenses", |
|
labels={"x": "Month", "y": "Amount ($)"} |
|
) |
|
st.plotly_chart(fig2) |
|
|
|
|
|
budgets = { |
|
"Groceries": 300, |
|
"Rent": 1000, |
|
"Utilities": 150, |
|
"Entertainment": 100, |
|
"Dining": 150, |
|
"Transportation": 120, |
|
} |
|
|
|
|
|
df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1) |
|
|
|
|
|
exceeded_budget = df[df['Budget_Exceeded'] == True] |
|
st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']]) |
|
|
|
|
|
|
|
monthly_expenses_df = pd.DataFrame({ |
|
'Actual': monthly_expenses, |
|
'Budget': [sum(budgets.values())] * len(monthly_expenses) |
|
}) |
|
|
|
|
|
fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6)) |
|
plt.title('Monthly Spending vs Budget') |
|
plt.ylabel('Amount ($)') |
|
st.pyplot(fig3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
qa_model = pipeline('question-answering', model="distilbert-base-uncased-distilled-squad") |
|
|
|
|
|
|
|
knowledge_base = "\n".join(df.apply(lambda row: f"Description: {row['Description']}, Amount: {row['Amount']}, Category: {row['Category']}", axis=1)) |
|
|
|
|
|
def answer_question(question): |
|
result = qa_model(question=question, context=knowledge_base) |
|
return result['answer'] |
|
|
|
|
|
st.write("Ask a question about your expenses:") |
|
user_question = st.text_input("Enter your question:") |
|
|
|
if user_question: |
|
answer = answer_question(user_question) |
|
st.write(f"Answer: {answer}") |
|
|