Spaces:

varun321
/

invoice-reader-budget-categorizer

Sleeping

File size: 3,227 Bytes

from transformers import pipeline
import pandas as pd

class LLMAnalyzer:
    def __init__(self):
        self.summarizer = pipeline("summarization", model="facebook/bart-large", clean_up_tokenization_spaces=True)
        self.text_generator = pipeline("text-generation", model="facebook/bart-large", clean_up_tokenization_spaces=True)
        self.predefined_categories = ["Utilities", "Entertainment", "Groceries", "Travel", "Shopping", "Other"]

    def categorize_transactions(self, df):
        if df.empty:
            return df
        
        categorized_df = df.copy()
        transaction_data = df.to_dict(orient="records")
        categorized = []
        
        for t in transaction_data:
            desc = t["description"].lower()
            if "electricity" in desc or "water" in desc:
                category = "Utilities"
            elif "movie" in desc or "game" in desc:
                category = "Entertainment"
            elif "grocery" in desc or "food" in desc:
                category = "Groceries"
            elif "travel" in desc or "flight" in desc:
                category = "Travel"
            elif "shop" in desc or "amazon" in desc:
                category = "Shopping"
            else:
                category = "Other"
            
            categorized.append({
                "date": t["date"],
                "vendor": t["vendor"],
                "amount": t["amount"],
                "description": t["description"],
                "month": t["month"],
                "week": t["week"],
                "category": category
            })
        
        return pd.DataFrame(categorized)

    def analyze_spending_patterns(self, df):
        if df.empty:
            return "No transactions available for analysis."
        
        # Convert Period objects to strings for better summarization
        monthly_spending = df.groupby("month").agg({"amount": "sum"}).rename(index=str).to_dict()["amount"]
        weekly_spending = df.groupby("week").agg({"amount": "sum"}).to_dict()["amount"]
        category_summary = df.groupby("category").agg({"amount": "sum"}).to_dict()["amount"]
        
        analysis_text = (
            f"Monthly Spending Trends: {monthly_spending}\n"
            f"Weekly Spending Trends: {weekly_spending}\n"
            f"Category-wise Spending: {category_summary}"
        )
        
        # Adjust max_length and enable truncation
        summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"]
        return summary

    def generate_budget_recommendations(self, spending_analysis):
        prompt = (
            f"Based on the following spending analysis:\n{spending_analysis}\n"
            f"Provide 3 actionable budget optimization recommendations to manage and reduce expenses. "
            f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
        )
        
        # Enable sampling with temperature for better generation
        recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"]
        return recommendations