File size: 1,539 Bytes
f10ec56
 
2c359f1
c7d0bb8
 
f10ec56
c7d0bb8
 
2c359f1
a6ee9ca
c7d0bb8
a6ee9ca
6ca4f9e
11d5829
 
a6ee9ca
11d5829
 
 
 
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
 
a6ee9ca
11d5829
 
a6ee9ca
11d5829
 
 
a79c451
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline

# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")

if uploaded_file is not None:
    # Load the file into a DataFrame
    df = pd.read_csv(uploaded_file)

    # Debug: Display the column names to check if 'Description' exists
    st.write("Columns in the uploaded file:", df.columns)

    # Check if the 'Description' column exists
    if 'Description' not in df.columns:
        st.error("Error: The CSV file does not contain a 'Description' column.")
    else:
        # Initialize Hugging Face's zero-shot text classification model
        model_name = 'distilbert-base-uncased'
        classifier = pipeline('zero-shot-classification', model=model_name)

        # List of possible expense categories
        categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]

        # Function to classify transaction descriptions into categories
        def categorize_expense(description):
            result = classifier(description, candidate_labels=categories)
            return result['labels'][0]  # Choose the most probable category

        # Apply the categorization function to the 'Description' column in the dataset
        df['Category'] = df['Description'].apply(categorize_expense)

        # Show the categorized data
        st.write("Categorized Data:", df.head())

        #