Spaces:

varun321
/

invoice-reader-budget-categorizer

Sleeping

App Files Files Community

invoice-reader-budget-categorizer / utils /llm_analyzer.py

varun321

Switch to saving images as files instead of base64, optimize LLM parameters

0004b69 4 months ago

raw

history blame contribute delete

3.23 kB

	from transformers import pipeline
	import pandas as pd

	class LLMAnalyzer:
	def __init__(self):
	self.summarizer = pipeline("summarization", model="facebook/bart-large", clean_up_tokenization_spaces=True)
	self.text_generator = pipeline("text-generation", model="facebook/bart-large", clean_up_tokenization_spaces=True)
	self.predefined_categories = ["Utilities", "Entertainment", "Groceries", "Travel", "Shopping", "Other"]

	def categorize_transactions(self, df):
	if df.empty:
	return df

	categorized_df = df.copy()
	transaction_data = df.to_dict(orient="records")
	categorized = []

	for t in transaction_data:
	desc = t["description"].lower()
	if "electricity" in desc or "water" in desc:
	category = "Utilities"
	elif "movie" in desc or "game" in desc:
	category = "Entertainment"
	elif "grocery" in desc or "food" in desc:
	category = "Groceries"
	elif "travel" in desc or "flight" in desc:
	category = "Travel"
	elif "shop" in desc or "amazon" in desc:
	category = "Shopping"
	else:
	category = "Other"

	categorized.append({
	"date": t["date"],
	"vendor": t["vendor"],
	"amount": t["amount"],
	"description": t["description"],
	"month": t["month"],
	"week": t["week"],
	"category": category
	})

	return pd.DataFrame(categorized)

	def analyze_spending_patterns(self, df):
	if df.empty:
	return "No transactions available for analysis."

	# Convert Period objects to strings for better summarization
	monthly_spending = df.groupby("month").agg({"amount": "sum"}).rename(index=str).to_dict()["amount"]
	weekly_spending = df.groupby("week").agg({"amount": "sum"}).to_dict()["amount"]
	category_summary = df.groupby("category").agg({"amount": "sum"}).to_dict()["amount"]

	analysis_text = (
	f"Monthly Spending Trends: {monthly_spending}\n"
	f"Weekly Spending Trends: {weekly_spending}\n"
	f"Category-wise Spending: {category_summary}"
	)

	# Adjust max_length and enable truncation
	summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"]
	return summary

	def generate_budget_recommendations(self, spending_analysis):
	prompt = (
	f"Based on the following spending analysis:\n{spending_analysis}\n"
	f"Provide 3 actionable budget optimization recommendations to manage and reduce expenses. "
	f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
	)

	# Enable sampling with temperature for better generation
	recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"]
	return recommendations