Switch to saving images as files instead of base64, optimize LLM parameters
Browse files- app.py +19 -12
- images/category_spending.png +0 -0
- utils/__pycache__/data_processor.cpython-39.pyc +0 -0
- utils/__pycache__/invoice_parser.cpython-39.pyc +0 -0
- utils/__pycache__/llm_analyzer.cpython-39.pyc +0 -0
- utils/__pycache__/report_generator.cpython-39.pyc +0 -0
- utils/data_processor.py +2 -0
- utils/invoice_parser.py +0 -1
- utils/llm_analyzer.py +4 -2
- utils/report_generator.py +28 -30
app.py
CHANGED
@@ -3,42 +3,49 @@ from utils.invoice_parser import parse_invoice
|
|
3 |
from utils.data_processor import process_data
|
4 |
from utils.llm_analyzer import LLMAnalyzer
|
5 |
from utils.report_generator import generate_report
|
|
|
6 |
|
7 |
# Initialize the LLM analyzer
|
8 |
analyzer = LLMAnalyzer()
|
9 |
-
|
10 |
def process_invoice(pdf_file):
|
11 |
try:
|
12 |
if not pdf_file:
|
13 |
-
return "No file uploaded."
|
14 |
|
15 |
-
transactions = parse_invoice(pdf_file)
|
16 |
if not transactions:
|
17 |
-
return "No transactions found in the invoice."
|
18 |
|
19 |
df = process_data(transactions)
|
20 |
if df.empty:
|
21 |
-
return "No valid transactions after processing."
|
22 |
|
23 |
df_categorized = analyzer.categorize_transactions(df)
|
24 |
spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
|
25 |
-
|
26 |
recommendations = analyzer.generate_budget_recommendations(spending_analysis)
|
27 |
-
|
28 |
report = generate_report(df_categorized, spending_analysis, recommendations)
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
|
32 |
except Exception as e:
|
33 |
-
return f"An error occurred: {str(e)}"
|
34 |
|
35 |
interface = gr.Interface(
|
36 |
fn=process_invoice,
|
37 |
-
inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),
|
38 |
-
outputs=
|
|
|
|
|
|
|
|
|
39 |
title="Invoice Reader & Budget Categorizer",
|
40 |
description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
|
41 |
)
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
-
interface.launch(
|
|
|
3 |
from utils.data_processor import process_data
|
4 |
from utils.llm_analyzer import LLMAnalyzer
|
5 |
from utils.report_generator import generate_report
|
6 |
+
import os
|
7 |
|
8 |
# Initialize the LLM analyzer
|
9 |
analyzer = LLMAnalyzer()
|
10 |
+
|
11 |
def process_invoice(pdf_file):
|
12 |
try:
|
13 |
if not pdf_file:
|
14 |
+
return "No file uploaded.", None, None
|
15 |
|
16 |
+
transactions = parse_invoice(pdf_file)
|
17 |
if not transactions:
|
18 |
+
return "No transactions found in the invoice.", None, None
|
19 |
|
20 |
df = process_data(transactions)
|
21 |
if df.empty:
|
22 |
+
return "No valid transactions after processing.", None, None
|
23 |
|
24 |
df_categorized = analyzer.categorize_transactions(df)
|
25 |
spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
|
|
|
26 |
recommendations = analyzer.generate_budget_recommendations(spending_analysis)
|
|
|
27 |
report = generate_report(df_categorized, spending_analysis, recommendations)
|
28 |
|
29 |
+
# Get paths to generated images
|
30 |
+
category_image_path = os.path.join("images", "category_spending.png") if os.path.exists(os.path.join("images", "category_spending.png")) else None
|
31 |
+
monthly_image_path = os.path.join("images", "monthly_spending.png") if os.path.exists(os.path.join("images", "monthly_spending.png")) else None
|
32 |
+
|
33 |
+
return report, category_image_path, monthly_image_path
|
34 |
|
35 |
except Exception as e:
|
36 |
+
return f"An error occurred: {str(e)}", None, None
|
37 |
|
38 |
interface = gr.Interface(
|
39 |
fn=process_invoice,
|
40 |
+
inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),
|
41 |
+
outputs=[
|
42 |
+
gr.Markdown(label="Report"),
|
43 |
+
gr.Image(label="Category-wise Spending", type="filepath"),
|
44 |
+
gr.Image(label="Monthly Spending Trend", type="filepath")
|
45 |
+
],
|
46 |
title="Invoice Reader & Budget Categorizer",
|
47 |
description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
|
48 |
)
|
49 |
|
50 |
if __name__ == "__main__":
|
51 |
+
interface.launch() # Removed share=True for Spaces compatibility
|
images/category_spending.png
ADDED
![]() |
utils/__pycache__/data_processor.cpython-39.pyc
CHANGED
Binary files a/utils/__pycache__/data_processor.cpython-39.pyc and b/utils/__pycache__/data_processor.cpython-39.pyc differ
|
|
utils/__pycache__/invoice_parser.cpython-39.pyc
CHANGED
Binary files a/utils/__pycache__/invoice_parser.cpython-39.pyc and b/utils/__pycache__/invoice_parser.cpython-39.pyc differ
|
|
utils/__pycache__/llm_analyzer.cpython-39.pyc
CHANGED
Binary files a/utils/__pycache__/llm_analyzer.cpython-39.pyc and b/utils/__pycache__/llm_analyzer.cpython-39.pyc differ
|
|
utils/__pycache__/report_generator.cpython-39.pyc
CHANGED
Binary files a/utils/__pycache__/report_generator.cpython-39.pyc and b/utils/__pycache__/report_generator.cpython-39.pyc differ
|
|
utils/data_processor.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import pandas as pd
|
|
|
2 |
def process_data(transactions):
|
|
|
3 |
df = pd.DataFrame(transactions)
|
4 |
|
5 |
if df.empty:
|
|
|
1 |
import pandas as pd
|
2 |
+
|
3 |
def process_data(transactions):
|
4 |
+
# Convert to DataFrame
|
5 |
df = pd.DataFrame(transactions)
|
6 |
|
7 |
if df.empty:
|
utils/invoice_parser.py
CHANGED
@@ -11,7 +11,6 @@ def parse_invoice(pdf_file):
|
|
11 |
lines = text.split("\n")
|
12 |
|
13 |
for line in lines:
|
14 |
-
# Example pattern for invoice transaction
|
15 |
pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
|
16 |
match = re.match(pattern, line.strip())
|
17 |
|
|
|
11 |
lines = text.split("\n")
|
12 |
|
13 |
for line in lines:
|
|
|
14 |
pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
|
15 |
match = re.match(pattern, line.strip())
|
16 |
|
utils/llm_analyzer.py
CHANGED
@@ -57,7 +57,8 @@ class LLMAnalyzer:
|
|
57 |
f"Category-wise Spending: {category_summary}"
|
58 |
)
|
59 |
|
60 |
-
|
|
|
61 |
return summary
|
62 |
|
63 |
def generate_budget_recommendations(self, spending_analysis):
|
@@ -67,5 +68,6 @@ class LLMAnalyzer:
|
|
67 |
f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
|
68 |
)
|
69 |
|
70 |
-
|
|
|
71 |
return recommendations
|
|
|
57 |
f"Category-wise Spending: {category_summary}"
|
58 |
)
|
59 |
|
60 |
+
# Adjust max_length and enable truncation
|
61 |
+
summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"]
|
62 |
return summary
|
63 |
|
64 |
def generate_budget_recommendations(self, spending_analysis):
|
|
|
68 |
f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
|
69 |
)
|
70 |
|
71 |
+
# Enable sampling with temperature for better generation
|
72 |
+
recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"]
|
73 |
return recommendations
|
utils/report_generator.py
CHANGED
@@ -1,55 +1,53 @@
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
import pandas as pd
|
3 |
-
import
|
4 |
-
import base64
|
5 |
|
6 |
def generate_report(df, spending_analysis, recommendations):
|
|
|
|
|
|
|
|
|
|
|
7 |
report = f"""
|
8 |
## Invoice Reader & Budget Categorizer Report
|
9 |
|
10 |
### Categorized Transactions
|
11 |
-
{df.to_markdown(index=False)}
|
12 |
|
13 |
### Spending Insights
|
14 |
{spending_analysis}
|
15 |
|
16 |
### Budget Recommendations
|
17 |
{recommendations}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"""
|
19 |
|
20 |
-
# Generate visualizations
|
21 |
if not df.empty:
|
22 |
# Category-wise spending pie chart
|
23 |
category_spending = df.groupby("category")["amount"].sum()
|
24 |
plt.figure(figsize=(6, 6))
|
25 |
plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
|
26 |
plt.title("Category-wise Spending")
|
27 |
-
|
28 |
-
plt.savefig(buf, format="png")
|
29 |
-
buf.seek(0)
|
30 |
-
category_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
|
31 |
plt.close()
|
32 |
-
|
33 |
-
# Monthly spending trend
|
34 |
monthly_spending = df.groupby("month")["amount"].sum()
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
report += f"""
|
47 |
-
### Visualizations
|
48 |
-
#### Category-wise Spending
|
49 |
-

|
50 |
-
|
51 |
-
#### Monthly Spending Trend
|
52 |
-

|
53 |
-
"""
|
54 |
-
|
55 |
return report
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
import pandas as pd
|
3 |
+
import os
|
|
|
4 |
|
5 |
def generate_report(df, spending_analysis, recommendations):
|
6 |
+
# Create a directory for images if it doesn't exist
|
7 |
+
images_dir = "images"
|
8 |
+
if not os.path.exists(images_dir):
|
9 |
+
os.makedirs(images_dir)
|
10 |
+
|
11 |
report = f"""
|
12 |
## Invoice Reader & Budget Categorizer Report
|
13 |
|
14 |
### Categorized Transactions
|
15 |
+
{df.groupby('category').apply(lambda x: x.to_markdown(index=False)).to_string()}
|
16 |
|
17 |
### Spending Insights
|
18 |
{spending_analysis}
|
19 |
|
20 |
### Budget Recommendations
|
21 |
{recommendations}
|
22 |
+
|
23 |
+
### Visualizations
|
24 |
+
#### Category-wise Spending
|
25 |
+
})
|
26 |
+
|
27 |
+
#### Monthly Spending Trend
|
28 |
+
})
|
29 |
"""
|
30 |
|
|
|
31 |
if not df.empty:
|
32 |
# Category-wise spending pie chart
|
33 |
category_spending = df.groupby("category")["amount"].sum()
|
34 |
plt.figure(figsize=(6, 6))
|
35 |
plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
|
36 |
plt.title("Category-wise Spending")
|
37 |
+
plt.savefig(os.path.join(images_dir, "category_spending.png"))
|
|
|
|
|
|
|
38 |
plt.close()
|
39 |
+
|
40 |
+
# Monthly spending trend with validation
|
41 |
monthly_spending = df.groupby("month")["amount"].sum()
|
42 |
+
if len(monthly_spending) > 1: # Ensure multiple months
|
43 |
+
plt.figure(figsize=(8, 4))
|
44 |
+
monthly_spending.plot(kind="line", marker="o")
|
45 |
+
plt.title("Monthly Spending Trend")
|
46 |
+
plt.xlabel("Month")
|
47 |
+
plt.ylabel("Amount")
|
48 |
+
plt.savefig(os.path.join(images_dir, "monthly_spending.png"))
|
49 |
+
plt.close()
|
50 |
+
else:
|
51 |
+
report += "\n#### Monthly Spending Trend\n(No trend available: insufficient data across multiple months)"
|
52 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return report
|