import pandas as pd import matplotlib.pyplot as plt import plotly.express as px import streamlit as st from transformers import pipeline # Upload CSV file containing transaction data uploaded_file = st.file_uploader("Upload Expense CSV", type="csv") if uploaded_file is not None: # Load the file into a DataFrame df = pd.read_csv(uploaded_file) # Debug: Display the column names to check if 'Description' exists st.write("Columns in the uploaded file:", df.columns) # Check if the 'Description' column exists if 'Description' not in df.columns: st.error("Error: The CSV file does not contain a 'Description' column.") else: # Initialize Hugging Face's zero-shot text classification model model_name = 'distilbert-base-uncased' classifier = pipeline('zero-shot-classification', model=model_name) # List of possible expense categories categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"] # Function to classify transaction descriptions into categories def categorize_expense(description): result = classifier(description, candidate_labels=categories) return result['labels'][0] # Choose the most probable category # Apply the categorization function to the 'Description' column in the dataset df['Category'] = df['Description'].apply(categorize_expense) # Show the categorized data st.write("Categorized Data:", df.head()) # Visualization 1: Pie Chart of Spending by Category category_expenses = df.groupby('Category')['Amount'].sum() # Plot pie chart for expense distribution by category fig1 = plt.figure(figsize=(8, 8)) category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors) plt.title('Expense Distribution by Category') plt.ylabel('') st.pyplot(fig1) # Visualization 2: Monthly Spending Trends (Line Chart) # Convert 'Date' to datetime df['Date'] = pd.to_datetime(df['Date']) # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues df['Month'] = df['Date'].dt.to_period('M').astype(str) # Convert Period to string # Group by month and calculate the total amount spent per month monthly_expenses = df.groupby('Month')['Amount'].sum() # Plot monthly spending trends as a line chart fig2 = px.line( monthly_expenses, x=monthly_expenses.index, y=monthly_expenses.values, title="Monthly Expenses", labels={"x": "Month", "y": "Amount ($)"} ) st.plotly_chart(fig2) # Budget and Alerts Example (Tracking if any category exceeds its budget) budgets = { "Groceries": 300, "Rent": 1000, "Utilities": 150, "Entertainment": 100, "Dining": 150, "Transportation": 120, } # Track if any category exceeds its budget df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1) # Show which categories exceeded their budgets exceeded_budget = df[df['Budget_Exceeded'] == True] st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']]) # Visualization 3: Monthly Spending vs Budget (Bar Chart) # Create a new DataFrame to show monthly budget vs actual spending monthly_expenses_df = pd.DataFrame({ 'Actual': monthly_expenses, 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity }) # Plot a bar chart to compare actual spending vs budget fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6)) plt.title('Monthly Spending vs Budget') plt.ylabel('Amount ($)') st.pyplot(fig3) # ----------------------------------------- # Question Answering Functionality # ----------------------------------------- # Initialize Hugging Face's question answering model (DistilBERT) qa_model = pipeline('question-answering', model="distilbert-base-uncased-distilled-squad") # Convert the DataFrame to a text block suitable for QA # Concatenate relevant information (Description, Amount, Category) to form a knowledge base knowledge_base = "\n".join(df.apply(lambda row: f"Description: {row['Description']}, Amount: {row['Amount']}, Category: {row['Category']}", axis=1)) # Function to answer questions based on the knowledge base def answer_question(question): result = qa_model(question=question, context=knowledge_base) return result['answer'] # Test the functionality st.write("Ask a question about your expenses:") user_question = st.text_input("Enter your question:") if user_question: answer = answer_question(user_question) st.write(f"Answer: {answer}")