engralimalik commited on
Commit
11d5829
·
verified ·
1 Parent(s): c7d0bb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -14
app.py CHANGED
@@ -11,22 +11,111 @@ if uploaded_file is not None:
11
  # Load the file into a DataFrame
12
  df = pd.read_csv(uploaded_file)
13
 
14
- # Display the first few rows of the dataset
15
- st.write("First few rows of the dataset:", df.head())
16
 
17
- # Initialize Hugging Face's zero-shot text classification model
18
- model_name = 'distilbert-base-uncased'
19
- classifier = pipeline('zero-shot-classification', model=model_name)
 
 
 
 
20
 
21
- # List of possible expense categories
22
- categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]
23
 
24
- # Function to classify transaction descriptions into categories
25
- def categorize_expense(description):
26
- result = classifier(description, candidate_labels=categories)
27
- return result['labels'][0] # Choose the most probable category
28
 
29
- # Apply the categorization function to the 'Description' column in the dataset
30
- df['Category'] = df['Description'].apply(categorize_expense)
31
 
32
- # Show the ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Load the file into a DataFrame
12
  df = pd.read_csv(uploaded_file)
13
 
14
+ # Debug: Display the column names to check if 'Description' exists
15
+ st.write("Columns in the uploaded file:", df.columns)
16
 
17
+ # Check if the 'Description' column exists
18
+ if 'Description' not in df.columns:
19
+ st.error("Error: The CSV file does not contain a 'Description' column.")
20
+ else:
21
+ # Initialize Hugging Face's zero-shot text classification model
22
+ model_name = 'distilbert-base-uncased'
23
+ classifier = pipeline('zero-shot-classification', model=model_name)
24
 
25
+ # List of possible expense categories
26
+ categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"]
27
 
28
+ # Function to classify transaction descriptions into categories
29
+ def categorize_expense(description):
30
+ result = classifier(description, candidate_labels=categories)
31
+ return result['labels'][0] # Choose the most probable category
32
 
33
+ # Apply the categorization function to the 'Description' column in the dataset
34
+ df['Category'] = df['Description'].apply(categorize_expense)
35
 
36
+ # Show the categorized data
37
+ st.write("Categorized Data:", df.head())
38
+
39
+ # Visualization 1: Pie Chart of Spending by Category
40
+ category_expenses = df.groupby('Category')['Amount'].sum()
41
+
42
+ # Plot pie chart for expense distribution by category
43
+ fig1 = plt.figure(figsize=(8, 8))
44
+ category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
45
+ plt.title('Expense Distribution by Category')
46
+ plt.ylabel('')
47
+ st.pyplot(fig1)
48
+
49
+ # Visualization 2: Monthly Spending Trends (Line Chart)
50
+ # Convert 'Date' to datetime
51
+ df['Date'] = pd.to_datetime(df['Date'])
52
+
53
+ # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues
54
+ df['Month'] = df['Date'].dt.to_period('M').astype(str) # Convert Period to string
55
+
56
+ # Group by month and calculate the total amount spent per month
57
+ monthly_expenses = df.groupby('Month')['Amount'].sum()
58
+
59
+ # Plot monthly spending trends as a line chart
60
+ fig2 = px.line(
61
+ monthly_expenses,
62
+ x=monthly_expenses.index,
63
+ y=monthly_expenses.values,
64
+ title="Monthly Expenses",
65
+ labels={"x": "Month", "y": "Amount ($)"}
66
+ )
67
+ st.plotly_chart(fig2)
68
+
69
+ # Budget and Alerts Example (Tracking if any category exceeds its budget)
70
+ budgets = {
71
+ "Groceries": 300,
72
+ "Rent": 1000,
73
+ "Utilities": 150,
74
+ "Entertainment": 100,
75
+ "Dining": 150,
76
+ "Transportation": 120,
77
+ }
78
+
79
+ # Track if any category exceeds its budget
80
+ df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1)
81
+
82
+ # Show which categories exceeded their budgets
83
+ exceeded_budget = df[df['Budget_Exceeded'] == True]
84
+ st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']])
85
+
86
+ # Visualization 3: Monthly Spending vs Budget (Bar Chart)
87
+ # Create a new DataFrame to show monthly budget vs actual spending
88
+ monthly_expenses_df = pd.DataFrame({
89
+ 'Actual': monthly_expenses,
90
+ 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity
91
+ })
92
+
93
+ # Plot a bar chart to compare actual spending vs budget
94
+ fig3 = monthly_expenses_df.plot(kind='bar', figsize=(10, 6))
95
+ plt.title('Monthly Spending vs Budget')
96
+ plt.ylabel('Amount ($)')
97
+ st.pyplot(fig3)
98
+
99
+ # -----------------------------------------
100
+ # Question Answering Functionality
101
+ # -----------------------------------------
102
+
103
+ # Initialize Hugging Face's question answering model (DistilBERT)
104
+ qa_model = pipeline('question-answering', model="distilbert-base-uncased-distilled-squad")
105
+
106
+ # Convert the DataFrame to a text block suitable for QA
107
+ # Concatenate relevant information (Description, Amount, Category) to form a knowledge base
108
+ knowledge_base = "\n".join(df.apply(lambda row: f"Description: {row['Description']}, Amount: {row['Amount']}, Category: {row['Category']}", axis=1))
109
+
110
+ # Function to answer questions based on the knowledge base
111
+ def answer_question(question):
112
+ result = qa_model(question=question, context=knowledge_base)
113
+ return result['answer']
114
+
115
+ # Test the functionality
116
+ st.write("Ask a question about your expenses:")
117
+ user_question = st.text_input("Enter your question:")
118
+
119
+ if user_question:
120
+ answer = answer_question(user_question)
121
+ st.write(f"Answer: {answer}")