Spaces:

varun321
/

invoice-reader-budget-categorizer

Sleeping

App Files Files Community

varun321 commited on Apr 5

Commit

0004b69

1 Parent(s): e20fc4d

Switch to saving images as files instead of base64, optimize LLM parameters

Browse files

Files changed (10) hide show

app.py +19 -12
images/category_spending.png +0 -0
utils/__pycache__/data_processor.cpython-39.pyc +0 -0
utils/__pycache__/invoice_parser.cpython-39.pyc +0 -0
utils/__pycache__/llm_analyzer.cpython-39.pyc +0 -0
utils/__pycache__/report_generator.cpython-39.pyc +0 -0
utils/data_processor.py +2 -0
utils/invoice_parser.py +0 -1
utils/llm_analyzer.py +4 -2
utils/report_generator.py +28 -30

app.py CHANGED Viewed

@@ -3,42 +3,49 @@ from utils.invoice_parser import parse_invoice
 from utils.data_processor import process_data
 from utils.llm_analyzer import LLMAnalyzer
 from utils.report_generator import generate_report
 # Initialize the LLM analyzer
 analyzer = LLMAnalyzer()
 def process_invoice(pdf_file):
     try:
         if not pdf_file:
-            return "No file uploaded."
-        transactions = parse_invoice(pdf_file)  # Note: pdf_file is now a path string
         if not transactions:
-            return "No transactions found in the invoice."
         df = process_data(transactions)
         if df.empty:
-            return "No valid transactions after processing."
         df_categorized = analyzer.categorize_transactions(df)
         spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
         recommendations = analyzer.generate_budget_recommendations(spending_analysis)
         report = generate_report(df_categorized, spending_analysis, recommendations)
-        return report
     except Exception as e:
-        return f"An error occurred: {str(e)}"
 interface = gr.Interface(
     fn=process_invoice,
-    inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),  # Changed to "filepath"
-    outputs=gr.Markdown(label="Invoice Analysis Report"),
     title="Invoice Reader & Budget Categorizer",
     description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
 )
 if __name__ == "__main__":
-    interface.launch(share=True)  # Generates a public link

 from utils.data_processor import process_data
 from utils.llm_analyzer import LLMAnalyzer
 from utils.report_generator import generate_report
+import os
 # Initialize the LLM analyzer
 analyzer = LLMAnalyzer()
 def process_invoice(pdf_file):
     try:
         if not pdf_file:
+            return "No file uploaded.", None, None
+        transactions = parse_invoice(pdf_file)
         if not transactions:
+            return "No transactions found in the invoice.", None, None
         df = process_data(transactions)
         if df.empty:
+            return "No valid transactions after processing.", None, None
         df_categorized = analyzer.categorize_transactions(df)
         spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
         recommendations = analyzer.generate_budget_recommendations(spending_analysis)
         report = generate_report(df_categorized, spending_analysis, recommendations)
+        # Get paths to generated images
+        category_image_path = os.path.join("images", "category_spending.png") if os.path.exists(os.path.join("images", "category_spending.png")) else None
+        monthly_image_path = os.path.join("images", "monthly_spending.png") if os.path.exists(os.path.join("images", "monthly_spending.png")) else None
+        return report, category_image_path, monthly_image_path
     except Exception as e:
+        return f"An error occurred: {str(e)}", None, None
 interface = gr.Interface(
     fn=process_invoice,
+    inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),
+    outputs=[
+        gr.Markdown(label="Report"),
+        gr.Image(label="Category-wise Spending", type="filepath"),
+        gr.Image(label="Monthly Spending Trend", type="filepath")
+    ],
     title="Invoice Reader & Budget Categorizer",
     description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
 )
 if __name__ == "__main__":
+    interface.launch()  # Removed share=True for Spaces compatibility

images/category_spending.png ADDED Viewed

utils/__pycache__/data_processor.cpython-39.pyc CHANGED Viewed

Binary files a/utils/__pycache__/data_processor.cpython-39.pyc and b/utils/__pycache__/data_processor.cpython-39.pyc differ

utils/__pycache__/invoice_parser.cpython-39.pyc CHANGED Viewed

Binary files a/utils/__pycache__/invoice_parser.cpython-39.pyc and b/utils/__pycache__/invoice_parser.cpython-39.pyc differ

utils/__pycache__/llm_analyzer.cpython-39.pyc CHANGED Viewed

Binary files a/utils/__pycache__/llm_analyzer.cpython-39.pyc and b/utils/__pycache__/llm_analyzer.cpython-39.pyc differ

utils/__pycache__/report_generator.cpython-39.pyc CHANGED Viewed

Binary files a/utils/__pycache__/report_generator.cpython-39.pyc and b/utils/__pycache__/report_generator.cpython-39.pyc differ

utils/data_processor.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import pandas as pd
 def process_data(transactions):
     df = pd.DataFrame(transactions)
     if df.empty:

 import pandas as pd
 def process_data(transactions):
+    # Convert to DataFrame
     df = pd.DataFrame(transactions)
     if df.empty:

utils/invoice_parser.py CHANGED Viewed

@@ -11,7 +11,6 @@ def parse_invoice(pdf_file):
             lines = text.split("\n")
             for line in lines:
-                # Example pattern for invoice transaction
                 pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
                 match = re.match(pattern, line.strip())

             lines = text.split("\n")
             for line in lines:
                 pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
                 match = re.match(pattern, line.strip())

utils/llm_analyzer.py CHANGED Viewed

@@ -57,7 +57,8 @@ class LLMAnalyzer:
             f"Category-wise Spending: {category_summary}"
         )
-        summary = self.summarizer(analysis_text, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
         return summary
     def generate_budget_recommendations(self, spending_analysis):
@@ -67,5 +68,6 @@ class LLMAnalyzer:
             f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
         )
-        recommendations = self.text_generator(prompt, max_length=200, num_return_sequences=1, temperature=0.7)[0]["generated_text"]
         return recommendations

             f"Category-wise Spending: {category_summary}"
         )
+        # Adjust max_length and enable truncation
+        summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"]
         return summary
     def generate_budget_recommendations(self, spending_analysis):
             f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
         )
+        # Enable sampling with temperature for better generation
+        recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"]
         return recommendations

utils/report_generator.py CHANGED Viewed

@@ -1,55 +1,53 @@
 import matplotlib.pyplot as plt
 import pandas as pd
-import io
-import base64
 def generate_report(df, spending_analysis, recommendations):
     report = f"""
     ## Invoice Reader & Budget Categorizer Report
     ### Categorized Transactions
-    {df.to_markdown(index=False)}
     ### Spending Insights
     {spending_analysis}
     ### Budget Recommendations
     {recommendations}
     """
-    # Generate visualizations
     if not df.empty:
         # Category-wise spending pie chart
         category_spending = df.groupby("category")["amount"].sum()
         plt.figure(figsize=(6, 6))
         plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
         plt.title("Category-wise Spending")
-        buf = io.BytesIO()
-        plt.savefig(buf, format="png")
-        buf.seek(0)
-        category_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
         plt.close()
-        # Monthly spending trend
         monthly_spending = df.groupby("month")["amount"].sum()
-        plt.figure(figsize=(8, 4))
-        monthly_spending.plot(kind="line", marker="o")
-        plt.title("Monthly Spending Trend")
-        plt.xlabel("Month")
-        plt.ylabel("Amount")
-        buf = io.BytesIO()
-        plt.savefig(buf, format="png")
-        buf.seek(0)
-        monthly_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
-        plt.close()
-        report += f"""
-        ### Visualizations
-        #### Category-wise Spending
-        ![Category-wise Spending](data:image/png;base64,{category_plot})
-        #### Monthly Spending Trend
-        ![Monthly Spending Trend](data:image/png;base64,{monthly_plot})
-        """
     return report

 import matplotlib.pyplot as plt
 import pandas as pd
+import os
 def generate_report(df, spending_analysis, recommendations):
+    # Create a directory for images if it doesn't exist
+    images_dir = "images"
+    if not os.path.exists(images_dir):
+        os.makedirs(images_dir)
     report = f"""
     ## Invoice Reader & Budget Categorizer Report
     ### Categorized Transactions
+    {df.groupby('category').apply(lambda x: x.to_markdown(index=False)).to_string()}
     ### Spending Insights
     {spending_analysis}
     ### Budget Recommendations
     {recommendations}
+    ### Visualizations
+    #### Category-wise Spending
+    ![Category-wise Spending]({os.path.join(images_dir, 'category_spending.png')})
+    #### Monthly Spending Trend
+    ![Monthly Spending Trend]({os.path.join(images_dir, 'monthly_spending.png')})
     """
     if not df.empty:
         # Category-wise spending pie chart
         category_spending = df.groupby("category")["amount"].sum()
         plt.figure(figsize=(6, 6))
         plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
         plt.title("Category-wise Spending")
+        plt.savefig(os.path.join(images_dir, "category_spending.png"))
         plt.close()
+        # Monthly spending trend with validation
         monthly_spending = df.groupby("month")["amount"].sum()
+        if len(monthly_spending) > 1:  # Ensure multiple months
+            plt.figure(figsize=(8, 4))
+            monthly_spending.plot(kind="line", marker="o")
+            plt.title("Monthly Spending Trend")
+            plt.xlabel("Month")
+            plt.ylabel("Amount")
+            plt.savefig(os.path.join(images_dir, "monthly_spending.png"))
+            plt.close()
+        else:
+            report += "\n#### Monthly Spending Trend\n(No trend available: insufficient data across multiple months)"
     return report