import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline

# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")

if uploaded_file is not None:
    # Load the file into a DataFrame
    df = pd.read_csv(uploaded_file)

    # Debug: Display the column names to check if 'Description' exists
    st.write("Columns in the uploaded file:", df.columns)

    # Check if the 'Description' column exists
    if 'Description' not in df.columns:
        st.error("Error: The CSV file does not contain a 'Description' column.")
    else:
        # Initialize Hugging Face's zero-shot text classification model
        model_name = 'distilbert-base-uncased'
        classifier = pipeline('zero-shot-classification', model=model_name)

        # List of possible expense categories
        categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]

        # Function to classify transaction descriptions into categories
        def categorize_expense(description):
            result = classifier(description, candidate_labels=categories)
            return result['labels'][0]  # Choose the most probable category

        # Apply the categorization function to the 'Description' column in the dataset
        df['Category'] = df['Description'].apply(categorize_expense)

        # Show the categorized data
        st.write("Categorized Data:", df.head())

        # Visualization 1: Pie Chart of Spending by Category
        category_expenses = df.groupby('Category')['Amount'].sum()

        # Plot pie chart for expense distribution by category
        fig1 = plt.figure(figsize=(8, 8))
        category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
        plt.title('Expense Distribution by Category')
        plt.ylabel('')
        st.pyplot(fig1)

        # Visualization 2: Monthly Spending Trends (Line Chart)
        # Convert 'Date' to datetime
        df['Date'] = pd.to_datetime(df['Date'])

        # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
        df['Month'] = df['Date'].dt.to_period('M').astype(str)  # Convert Period to string

        # Group by month and calculate the total amount spent per month
        monthly_expenses = df.groupby('Month')['Amount'].sum()

        # Plot monthly spending trends as a line chart
        fig2 = px.line(
            monthly_expenses, 
            x=monthly_expenses.index, 
            y=monthly_expenses.values, 
            title="Monthly Expenses", 
            labels={"x": "Month", "y": "Amount ($)"}
        )
        st.plotly_chart(fig2)

        # Budget and Alerts Example (Tracking if any category exceeds its budget)
        budgets = {
            "Groceries": 300,
            "Rent": 1000,
            "Utilities": 150,
            "Entertainment": 100,
            "Dining": 150,
            "Transportation": 120,
        }

        # Track if any category exceeds its budget
        df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)

        # Show which categories exceeded their budgets
        exceeded_budget = df[df['Budget_Exceeded'] == True]
        st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])

        # Visualization 3: Monthly Spending vs Budget (Bar Chart)
        # Create a new DataFrame to show monthly budget vs actual spending
        monthly_expenses_df = pd.DataFrame({
            'Actual': monthly_expenses,
            'Budget': [sum(budgets.values())] * len(monthly_expenses)  # Same budget for simplicity
        })

        # Plot a bar chart to compare actual spending vs budget
        fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6))
        plt.title('Monthly Spending vs Budget')
        plt.ylabel('Amount ($)')
        st.pyplot(fig3)

        # -----------------------------------------
        # Question Answering Functionality
        # -----------------------------------------

        # Initialize Hugging Face's question answering model (DistilBERT)
        qa_model = pipeline('question-answering', model="distilbert-base-uncased-distilled-squad")

        # Convert the DataFrame to a text block suitable for QA
        # Concatenate relevant information (Description, Amount, Category) to form a knowledge base
        knowledge_base = "\n".join(df.apply(lambda row: f"Description: {row['Description']}, Amount: {row['Amount']}, Category: {row['Category']}", axis=1))

        # Function to answer questions based on the knowledge base
        def answer_question(question):
            result = qa_model(question=question, context=knowledge_base)
            return result['answer']

        # Test the functionality
        st.write("Ask a question about your expenses:")
        user_question = st.text_input("Enter your question:")
        
        if user_question:
            answer = answer_question(user_question)
            st.write(f"Answer: {answer}")