Spaces:

varun321
/

invoice-reader-budget-categorizer

Sleeping

App Files Files Community

varun321 commited on Apr 5

Commit

1d54def

0 Parent(s):

Initial commit

Browse files

Files changed (11) hide show

README.md +31 -0
app.py +44 -0
requirements.txt +8 -0
utils/__pycache__/data_processor.cpython-39.pyc +0 -0
utils/__pycache__/invoice_parser.cpython-39.pyc +0 -0
utils/__pycache__/llm_analyzer.cpython-39.pyc +0 -0
utils/__pycache__/report_generator.cpython-39.pyc +0 -0
utils/data_processor.py +16 -0
utils/invoice_parser.py +33 -0
utils/llm_analyzer.py +71 -0
utils/report_generator.py +55 -0

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# Invoice Reader & Budget Categorizer
+This project creates an application that processes uploaded invoice PDFs, categorizes expenses, analyzes spending patterns using LLMs, and provides budget optimization recommendations.
+## Objective
+Create an application that:
+- Processes invoice PDFs to extract transaction details.
+- Categorizes expenses into predefined categories.
+- Analyzes spending trends over time.
+- Provides actionable budget recommendations.
+## Features
+- PDF upload and parsing.
+- Transaction categorization (Utilities, Entertainment, Groceries, Travel, Shopping, Other).
+- Spending pattern analysis over time.
+- Budget optimization suggestions.
+- Visualizations for spending insights.
+## Technical Stack
+- **PDF Parsing**: `pdfplumber`
+- **Data Processing**: `pandas`
+- **LLM**: Hugging Face Transformers (`facebook/bart-large`)
+- **Visualization**: `matplotlib`
+- **Interface**: Gradio
+- **Deployment**: Hugging Face Spaces
+## Setup Instructions
+1. Clone the repository:
+   ```bash
+   git clone <your-repo-url>
+   cd invoice-reader-budget-categorizer

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import gradio as gr
+from utils.invoice_parser import parse_invoice
+from utils.data_processor import process_data
+from utils.llm_analyzer import LLMAnalyzer
+from utils.report_generator import generate_report
+# Initialize the LLM analyzer
+analyzer = LLMAnalyzer()
+def process_invoice(pdf_file):
+    try:
+        if not pdf_file:
+            return "No file uploaded."
+        transactions = parse_invoice(pdf_file)  # Note: pdf_file is now a path string
+        if not transactions:
+            return "No transactions found in the invoice."
+        df = process_data(transactions)
+        if df.empty:
+            return "No valid transactions after processing."
+        df_categorized = analyzer.categorize_transactions(df)
+        spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
+        recommendations = analyzer.generate_budget_recommendations(spending_analysis)
+        report = generate_report(df_categorized, spending_analysis, recommendations)
+        return report
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+interface = gr.Interface(
+    fn=process_invoice,
+    inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),  # Changed to "filepath"
+    outputs=gr.Markdown(label="Invoice Analysis Report"),
+    title="Invoice Reader & Budget Categorizer",
+    description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
+)
+if __name__ == "__main__":
+    interface.launch(share=True)  # Generates a public link

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pdfplumber==0.11.4
+pandas==2.2.3
+transformers==4.44.2
+torch==2.4.1
+gradio==4.44.0
+matplotlib==3.9.2
+numpy==1.26.4
+tabulate

utils/__pycache__/data_processor.cpython-39.pyc ADDED Viewed

Binary file (538 Bytes). View file

utils/__pycache__/invoice_parser.cpython-39.pyc ADDED Viewed

Binary file (1 kB). View file

utils/__pycache__/llm_analyzer.cpython-39.pyc ADDED Viewed

Binary file (2.78 kB). View file

utils/__pycache__/report_generator.cpython-39.pyc ADDED Viewed

Binary file (1.81 kB). View file

utils/data_processor.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import pandas as pd
+def process_data(transactions):
+    df = pd.DataFrame(transactions)
+    if df.empty:
+        return df
+    # Ensure correct data types
+    df["date"] = pd.to_datetime(df["date"])
+    df["amount"] = df["amount"].astype(float)
+    # Add time-based columns
+    df["month"] = df["date"].dt.to_period("M")
+    df["week"] = df["date"].dt.isocalendar().week
+    return df

utils/invoice_parser.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import pdfplumber
+import re
+from datetime import datetime
+def parse_invoice(pdf_file):
+    transactions = []
+    with pdfplumber.open(pdf_file) as pdf:
+        for page in pdf.pages:
+            text = page.extract_text()
+            lines = text.split("\n")
+            for line in lines:
+                # Example pattern for invoice transaction
+                pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
+                match = re.match(pattern, line.strip())
+                if match:
+                    date_str, vendor, amount, description = match.groups()
+                    try:
+                        date = datetime.strptime(date_str, "%d/%m/%Y")
+                        amount = float(amount.replace(",", ""))
+                        transactions.append({
+                            "date": date,
+                            "vendor": vendor.strip(),
+                            "amount": amount,
+                            "description": description.strip()
+                        })
+                    except Exception as e:
+                        print(f"Error parsing line: {line}, Error: {e}")
+                        continue
+    return transactions

utils/llm_analyzer.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from transformers import pipeline
+import pandas as pd
+class LLMAnalyzer:
+    def __init__(self):
+        self.summarizer = pipeline("summarization", model="facebook/bart-large", clean_up_tokenization_spaces=True)
+        self.text_generator = pipeline("text-generation", model="facebook/bart-large", clean_up_tokenization_spaces=True)
+        self.predefined_categories = ["Utilities", "Entertainment", "Groceries", "Travel", "Shopping", "Other"]
+    def categorize_transactions(self, df):
+        if df.empty:
+            return df
+        categorized_df = df.copy()
+        transaction_data = df.to_dict(orient="records")
+        categorized = []
+        for t in transaction_data:
+            desc = t["description"].lower()
+            if "electricity" in desc or "water" in desc:
+                category = "Utilities"
+            elif "movie" in desc or "game" in desc:
+                category = "Entertainment"
+            elif "grocery" in desc or "food" in desc:
+                category = "Groceries"
+            elif "travel" in desc or "flight" in desc:
+                category = "Travel"
+            elif "shop" in desc or "amazon" in desc:
+                category = "Shopping"
+            else:
+                category = "Other"
+            categorized.append({
+                "date": t["date"],
+                "vendor": t["vendor"],
+                "amount": t["amount"],
+                "description": t["description"],
+                "month": t["month"],
+                "week": t["week"],
+                "category": category
+            })
+        return pd.DataFrame(categorized)
+    def analyze_spending_patterns(self, df):
+        if df.empty:
+            return "No transactions available for analysis."
+        # Convert Period objects to strings for better summarization
+        monthly_spending = df.groupby("month").agg({"amount": "sum"}).rename(index=str).to_dict()["amount"]
+        weekly_spending = df.groupby("week").agg({"amount": "sum"}).to_dict()["amount"]
+        category_summary = df.groupby("category").agg({"amount": "sum"}).to_dict()["amount"]
+        analysis_text = (
+            f"Monthly Spending Trends: {monthly_spending}\n"
+            f"Weekly Spending Trends: {weekly_spending}\n"
+            f"Category-wise Spending: {category_summary}"
+        )
+        summary = self.summarizer(analysis_text, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
+        return summary
+    def generate_budget_recommendations(self, spending_analysis):
+        prompt = (
+            f"Based on the following spending analysis:\n{spending_analysis}\n"
+            f"Provide 3 actionable budget optimization recommendations to manage and reduce expenses. "
+            f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
+        )
+        recommendations = self.text_generator(prompt, max_length=200, num_return_sequences=1, temperature=0.7)[0]["generated_text"]
+        return recommendations

utils/report_generator.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+import io
+import base64
+def generate_report(df, spending_analysis, recommendations):
+    report = f"""
+    ## Invoice Reader & Budget Categorizer Report
+    ### Categorized Transactions
+    {df.to_markdown(index=False)}
+    ### Spending Insights
+    {spending_analysis}
+    ### Budget Recommendations
+    {recommendations}
+    """
+    # Generate visualizations
+    if not df.empty:
+        # Category-wise spending pie chart
+        category_spending = df.groupby("category")["amount"].sum()
+        plt.figure(figsize=(6, 6))
+        plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
+        plt.title("Category-wise Spending")
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        buf.seek(0)
+        category_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
+        plt.close()
+        # Monthly spending trend
+        monthly_spending = df.groupby("month")["amount"].sum()
+        plt.figure(figsize=(8, 4))
+        monthly_spending.plot(kind="line", marker="o")
+        plt.title("Monthly Spending Trend")
+        plt.xlabel("Month")
+        plt.ylabel("Amount")
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        buf.seek(0)
+        monthly_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
+        plt.close()
+        report += f"""
+        ### Visualizations
+        #### Category-wise Spending
+        ![Category-wise Spending](data:image/png;base64,{category_plot})
+        #### Monthly Spending Trend
+        ![Monthly Spending Trend](data:image/png;base64,{monthly_plot})
+        """
+    return report