File size: 5,176 Bytes
f10ec56
 
2c359f1
c7d0bb8
 
f10ec56
c7d0bb8
 
2c359f1
a6ee9ca
c7d0bb8
a6ee9ca
6ca4f9e
11d5829
 
a6ee9ca
11d5829
 
 
 
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline

# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")

if uploaded_file is not None:
    # Load the file into a DataFrame
    df = pd.read_csv(uploaded_file)

    # Debug: Display the column names to check if 'Description' exists
    st.write("Columns in the uploaded file:", df.columns)

    # Check if the 'Description' column exists
    if 'Description' not in df.columns:
        st.error("Error: The CSV file does not contain a 'Description' column.")
    else:
        # Initialize Hugging Face's zero-shot text classification model
        model_name = 'distilbert-base-uncased'
        classifier = pipeline('zero-shot-classification', model=model_name)

        # List of possible expense categories
        categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]

        # Function to classify transaction descriptions into categories
        def categorize_expense(description):
            result = classifier(description, candidate_labels=categories)
            return result['labels'][0]  # Choose the most probable category

        # Apply the categorization function to the 'Description' column in the dataset
        df['Category'] = df['Description'].apply(categorize_expense)

        # Show the categorized data
        st.write("Categorized Data:", df.head())

        # Visualization 1: Pie Chart of Spending by Category
        category_expenses = df.groupby('Category')['Amount'].sum()

        # Plot pie chart for expense distribution by category
        fig1 = plt.figure(figsize=(8, 8))
        category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
        plt.title('Expense Distribution by Category')
        plt.ylabel('')
        st.pyplot(fig1)

        # Visualization 2: Monthly Spending Trends (Line Chart)
        # Convert 'Date' to datetime
        df['Date'] = pd.to_datetime(df['Date'])

        # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
        df['Month'] = df['Date'].dt.to_period('M').astype(str)  # Convert Period to string

        # Group by month and calculate the total amount spent per month
        monthly_expenses = df.groupby('Month')['Amount'].sum()

        # Plot monthly spending trends as a line chart
        fig2 = px.line(
            monthly_expenses, 
            x=monthly_expenses.index, 
            y=monthly_expenses.values, 
            title="Monthly Expenses", 
            labels={"x": "Month", "y": "Amount ($)"}
        )
        st.plotly_chart(fig2)

        # Budget and Alerts Example (Tracking if any category exceeds its budget)
        budgets = {
            "Groceries": 300,
            "Rent": 1000,
            "Utilities": 150,
            "Entertainment": 100,
            "Dining": 150,
            "Transportation": 120,
        }

        # Track if any category exceeds its budget
        df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)

        # Show which categories exceeded their budgets
        exceeded_budget = df[df['Budget_Exceeded'] == True]
        st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])

        # Visualization 3: Monthly Spending vs Budget (Bar Chart)
        # Create a new DataFrame to show monthly budget vs actual spending
        monthly_expenses_df = pd.DataFrame({
            'Actual': monthly_expenses,
            'Budget': [sum(budgets.values())] * len(monthly_expenses)  # Same budget for simplicity
        })

        # Plot a bar chart to compare actual spending vs budget
        fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6))
        plt.title('Monthly Spending vs Budget')
        plt.ylabel('Amount ($)')
        st.pyplot(fig3)

        # -----------------------------------------
        # Question Answering Functionality
        # -----------------------------------------

        # Initialize Hugging Face's question answering model (DistilBERT)
        qa_model = pipeline('question-answering', model="distilbert-base-uncased-distilled-squad")

        # Convert the DataFrame to a text block suitable for QA
        # Concatenate relevant information (Description, Amount, Category) to form a knowledge base
        knowledge_base = "\n".join(df.apply(lambda row: f"Description: {row['Description']}, Amount: {row['Amount']}, Category: {row['Category']}", axis=1))

        # Function to answer questions based on the knowledge base
        def answer_question(question):
            result = qa_model(question=question, context=knowledge_base)
            return result['answer']

        # Test the functionality
        st.write("Ask a question about your expenses:")
        user_question = st.text_input("Enter your question:")
        
        if user_question:
            answer = answer_question(user_question)
            st.write(f"Answer: {answer}")