File size: 1,539 Bytes
f10ec56 2c359f1 c7d0bb8 f10ec56 c7d0bb8 2c359f1 a6ee9ca c7d0bb8 a6ee9ca 6ca4f9e 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a6ee9ca 11d5829 a79c451 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import streamlit as st
from transformers import pipeline
# Upload CSV file containing transaction data
uploaded_file = st.file_uploader("Upload Expense CSV", type="csv")
if uploaded_file is not None:
# Load the file into a DataFrame
df = pd.read_csv(uploaded_file)
# Debug: Display the column names to check if 'Description' exists
st.write("Columns in the uploaded file:", df.columns)
# Check if the 'Description' column exists
if 'Description' not in df.columns:
st.error("Error: The CSV file does not contain a 'Description' column.")
else:
# Initialize Hugging Face's zero-shot text classification model
model_name = 'distilbert-base-uncased'
classifier = pipeline('zero-shot-classification', model=model_name)
# List of possible expense categories
categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]
# Function to classify transaction descriptions into categories
def categorize_expense(description):
result = classifier(description, candidate_labels=categories)
return result['labels'][0] # Choose the most probable category
# Apply the categorization function to the 'Description' column in the dataset
df['Category'] = df['Description'].apply(categorize_expense)
# Show the categorized data
st.write("Categorized Data:", df.head())
#
|