from transformers import pipeline import pandas as pd class LLMAnalyzer: def __init__(self): self.summarizer = pipeline("summarization", model="facebook/bart-large", clean_up_tokenization_spaces=True) self.text_generator = pipeline("text-generation", model="facebook/bart-large", clean_up_tokenization_spaces=True) self.predefined_categories = ["Utilities", "Entertainment", "Groceries", "Travel", "Shopping", "Other"] def categorize_transactions(self, df): if df.empty: return df categorized_df = df.copy() transaction_data = df.to_dict(orient="records") categorized = [] for t in transaction_data: desc = t["description"].lower() if "electricity" in desc or "water" in desc: category = "Utilities" elif "movie" in desc or "game" in desc: category = "Entertainment" elif "grocery" in desc or "food" in desc: category = "Groceries" elif "travel" in desc or "flight" in desc: category = "Travel" elif "shop" in desc or "amazon" in desc: category = "Shopping" else: category = "Other" categorized.append({ "date": t["date"], "vendor": t["vendor"], "amount": t["amount"], "description": t["description"], "month": t["month"], "week": t["week"], "category": category }) return pd.DataFrame(categorized) def analyze_spending_patterns(self, df): if df.empty: return "No transactions available for analysis." # Convert Period objects to strings for better summarization monthly_spending = df.groupby("month").agg({"amount": "sum"}).rename(index=str).to_dict()["amount"] weekly_spending = df.groupby("week").agg({"amount": "sum"}).to_dict()["amount"] category_summary = df.groupby("category").agg({"amount": "sum"}).to_dict()["amount"] analysis_text = ( f"Monthly Spending Trends: {monthly_spending}\n" f"Weekly Spending Trends: {weekly_spending}\n" f"Category-wise Spending: {category_summary}" ) # Adjust max_length and enable truncation summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"] return summary def generate_budget_recommendations(self, spending_analysis): prompt = ( f"Based on the following spending analysis:\n{spending_analysis}\n" f"Provide 3 actionable budget optimization recommendations to manage and reduce expenses. " f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')." ) # Enable sampling with temperature for better generation recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"] return recommendations