varun321 commited on
Commit
0004b69
·
1 Parent(s): e20fc4d

Switch to saving images as files instead of base64, optimize LLM parameters

Browse files
app.py CHANGED
@@ -3,42 +3,49 @@ from utils.invoice_parser import parse_invoice
3
  from utils.data_processor import process_data
4
  from utils.llm_analyzer import LLMAnalyzer
5
  from utils.report_generator import generate_report
 
6
 
7
  # Initialize the LLM analyzer
8
  analyzer = LLMAnalyzer()
9
-
10
  def process_invoice(pdf_file):
11
  try:
12
  if not pdf_file:
13
- return "No file uploaded."
14
 
15
- transactions = parse_invoice(pdf_file) # Note: pdf_file is now a path string
16
  if not transactions:
17
- return "No transactions found in the invoice."
18
 
19
  df = process_data(transactions)
20
  if df.empty:
21
- return "No valid transactions after processing."
22
 
23
  df_categorized = analyzer.categorize_transactions(df)
24
  spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
25
-
26
  recommendations = analyzer.generate_budget_recommendations(spending_analysis)
27
-
28
  report = generate_report(df_categorized, spending_analysis, recommendations)
29
 
30
- return report
 
 
 
 
31
 
32
  except Exception as e:
33
- return f"An error occurred: {str(e)}"
34
 
35
  interface = gr.Interface(
36
  fn=process_invoice,
37
- inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"), # Changed to "filepath"
38
- outputs=gr.Markdown(label="Invoice Analysis Report"),
 
 
 
 
39
  title="Invoice Reader & Budget Categorizer",
40
  description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
41
  )
42
 
43
  if __name__ == "__main__":
44
- interface.launch(share=True) # Generates a public link
 
3
  from utils.data_processor import process_data
4
  from utils.llm_analyzer import LLMAnalyzer
5
  from utils.report_generator import generate_report
6
+ import os
7
 
8
  # Initialize the LLM analyzer
9
  analyzer = LLMAnalyzer()
10
+
11
  def process_invoice(pdf_file):
12
  try:
13
  if not pdf_file:
14
+ return "No file uploaded.", None, None
15
 
16
+ transactions = parse_invoice(pdf_file)
17
  if not transactions:
18
+ return "No transactions found in the invoice.", None, None
19
 
20
  df = process_data(transactions)
21
  if df.empty:
22
+ return "No valid transactions after processing.", None, None
23
 
24
  df_categorized = analyzer.categorize_transactions(df)
25
  spending_analysis = analyzer.analyze_spending_patterns(df_categorized)
 
26
  recommendations = analyzer.generate_budget_recommendations(spending_analysis)
 
27
  report = generate_report(df_categorized, spending_analysis, recommendations)
28
 
29
+ # Get paths to generated images
30
+ category_image_path = os.path.join("images", "category_spending.png") if os.path.exists(os.path.join("images", "category_spending.png")) else None
31
+ monthly_image_path = os.path.join("images", "monthly_spending.png") if os.path.exists(os.path.join("images", "monthly_spending.png")) else None
32
+
33
+ return report, category_image_path, monthly_image_path
34
 
35
  except Exception as e:
36
+ return f"An error occurred: {str(e)}", None, None
37
 
38
  interface = gr.Interface(
39
  fn=process_invoice,
40
+ inputs=gr.File(label="Upload Invoice (PDF)", type="filepath"),
41
+ outputs=[
42
+ gr.Markdown(label="Report"),
43
+ gr.Image(label="Category-wise Spending", type="filepath"),
44
+ gr.Image(label="Monthly Spending Trend", type="filepath")
45
+ ],
46
  title="Invoice Reader & Budget Categorizer",
47
  description="Upload your invoice PDF to categorize transactions, analyze spending patterns, and get budget optimization recommendations."
48
  )
49
 
50
  if __name__ == "__main__":
51
+ interface.launch() # Removed share=True for Spaces compatibility
images/category_spending.png ADDED
utils/__pycache__/data_processor.cpython-39.pyc CHANGED
Binary files a/utils/__pycache__/data_processor.cpython-39.pyc and b/utils/__pycache__/data_processor.cpython-39.pyc differ
 
utils/__pycache__/invoice_parser.cpython-39.pyc CHANGED
Binary files a/utils/__pycache__/invoice_parser.cpython-39.pyc and b/utils/__pycache__/invoice_parser.cpython-39.pyc differ
 
utils/__pycache__/llm_analyzer.cpython-39.pyc CHANGED
Binary files a/utils/__pycache__/llm_analyzer.cpython-39.pyc and b/utils/__pycache__/llm_analyzer.cpython-39.pyc differ
 
utils/__pycache__/report_generator.cpython-39.pyc CHANGED
Binary files a/utils/__pycache__/report_generator.cpython-39.pyc and b/utils/__pycache__/report_generator.cpython-39.pyc differ
 
utils/data_processor.py CHANGED
@@ -1,5 +1,7 @@
1
  import pandas as pd
 
2
  def process_data(transactions):
 
3
  df = pd.DataFrame(transactions)
4
 
5
  if df.empty:
 
1
  import pandas as pd
2
+
3
  def process_data(transactions):
4
+ # Convert to DataFrame
5
  df = pd.DataFrame(transactions)
6
 
7
  if df.empty:
utils/invoice_parser.py CHANGED
@@ -11,7 +11,6 @@ def parse_invoice(pdf_file):
11
  lines = text.split("\n")
12
 
13
  for line in lines:
14
- # Example pattern for invoice transaction
15
  pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
16
  match = re.match(pattern, line.strip())
17
 
 
11
  lines = text.split("\n")
12
 
13
  for line in lines:
 
14
  pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
15
  match = re.match(pattern, line.strip())
16
 
utils/llm_analyzer.py CHANGED
@@ -57,7 +57,8 @@ class LLMAnalyzer:
57
  f"Category-wise Spending: {category_summary}"
58
  )
59
 
60
- summary = self.summarizer(analysis_text, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
 
61
  return summary
62
 
63
  def generate_budget_recommendations(self, spending_analysis):
@@ -67,5 +68,6 @@ class LLMAnalyzer:
67
  f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
68
  )
69
 
70
- recommendations = self.text_generator(prompt, max_length=200, num_return_sequences=1, temperature=0.7)[0]["generated_text"]
 
71
  return recommendations
 
57
  f"Category-wise Spending: {category_summary}"
58
  )
59
 
60
+ # Adjust max_length and enable truncation
61
+ summary = self.summarizer(analysis_text, max_length=50, min_length=20, do_sample=False, truncation=True)[0]["summary_text"]
62
  return summary
63
 
64
  def generate_budget_recommendations(self, spending_analysis):
 
68
  f"Each recommendation should be concise, specific, and practical (e.g., 'Reduce dining out by 20%')."
69
  )
70
 
71
+ # Enable sampling with temperature for better generation
72
+ recommendations = self.text_generator(prompt, max_length=150, num_return_sequences=1, do_sample=True, temperature=0.7)[0]["generated_text"]
73
  return recommendations
utils/report_generator.py CHANGED
@@ -1,55 +1,53 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
- import io
4
- import base64
5
 
6
  def generate_report(df, spending_analysis, recommendations):
 
 
 
 
 
7
  report = f"""
8
  ## Invoice Reader & Budget Categorizer Report
9
 
10
  ### Categorized Transactions
11
- {df.to_markdown(index=False)}
12
 
13
  ### Spending Insights
14
  {spending_analysis}
15
 
16
  ### Budget Recommendations
17
  {recommendations}
 
 
 
 
 
 
 
18
  """
19
 
20
- # Generate visualizations
21
  if not df.empty:
22
  # Category-wise spending pie chart
23
  category_spending = df.groupby("category")["amount"].sum()
24
  plt.figure(figsize=(6, 6))
25
  plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
26
  plt.title("Category-wise Spending")
27
- buf = io.BytesIO()
28
- plt.savefig(buf, format="png")
29
- buf.seek(0)
30
- category_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
31
  plt.close()
32
-
33
- # Monthly spending trend
34
  monthly_spending = df.groupby("month")["amount"].sum()
35
- plt.figure(figsize=(8, 4))
36
- monthly_spending.plot(kind="line", marker="o")
37
- plt.title("Monthly Spending Trend")
38
- plt.xlabel("Month")
39
- plt.ylabel("Amount")
40
- buf = io.BytesIO()
41
- plt.savefig(buf, format="png")
42
- buf.seek(0)
43
- monthly_plot = base64.b64encode(buf.getvalue()).decode("utf-8")
44
- plt.close()
45
-
46
- report += f"""
47
- ### Visualizations
48
- #### Category-wise Spending
49
- ![Category-wise Spending](data:image/png;base64,{category_plot})
50
-
51
- #### Monthly Spending Trend
52
- ![Monthly Spending Trend](data:image/png;base64,{monthly_plot})
53
- """
54
-
55
  return report
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
+ import os
 
4
 
5
  def generate_report(df, spending_analysis, recommendations):
6
+ # Create a directory for images if it doesn't exist
7
+ images_dir = "images"
8
+ if not os.path.exists(images_dir):
9
+ os.makedirs(images_dir)
10
+
11
  report = f"""
12
  ## Invoice Reader & Budget Categorizer Report
13
 
14
  ### Categorized Transactions
15
+ {df.groupby('category').apply(lambda x: x.to_markdown(index=False)).to_string()}
16
 
17
  ### Spending Insights
18
  {spending_analysis}
19
 
20
  ### Budget Recommendations
21
  {recommendations}
22
+
23
+ ### Visualizations
24
+ #### Category-wise Spending
25
+ ![Category-wise Spending]({os.path.join(images_dir, 'category_spending.png')})
26
+
27
+ #### Monthly Spending Trend
28
+ ![Monthly Spending Trend]({os.path.join(images_dir, 'monthly_spending.png')})
29
  """
30
 
 
31
  if not df.empty:
32
  # Category-wise spending pie chart
33
  category_spending = df.groupby("category")["amount"].sum()
34
  plt.figure(figsize=(6, 6))
35
  plt.pie(category_spending, labels=category_spending.index, autopct="%1.1f%%")
36
  plt.title("Category-wise Spending")
37
+ plt.savefig(os.path.join(images_dir, "category_spending.png"))
 
 
 
38
  plt.close()
39
+
40
+ # Monthly spending trend with validation
41
  monthly_spending = df.groupby("month")["amount"].sum()
42
+ if len(monthly_spending) > 1: # Ensure multiple months
43
+ plt.figure(figsize=(8, 4))
44
+ monthly_spending.plot(kind="line", marker="o")
45
+ plt.title("Monthly Spending Trend")
46
+ plt.xlabel("Month")
47
+ plt.ylabel("Amount")
48
+ plt.savefig(os.path.join(images_dir, "monthly_spending.png"))
49
+ plt.close()
50
+ else:
51
+ report += "\n#### Monthly Spending Trend\n(No trend available: insufficient data across multiple months)"
52
+
 
 
 
 
 
 
 
 
 
53
  return report