File size: 1,174 Bytes
1d54def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import pdfplumber
import re
from datetime import datetime

def parse_invoice(pdf_file):
    transactions = []
    
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            lines = text.split("\n")
            
            for line in lines:
                pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
                match = re.match(pattern, line.strip())
                
                if match:
                    date_str, vendor, amount, description = match.groups()
                    try:
                        date = datetime.strptime(date_str, "%d/%m/%Y")
                        amount = float(amount.replace(",", ""))
                        transactions.append({
                            "date": date,
                            "vendor": vendor.strip(),
                            "amount": amount,
                            "description": description.strip()
                        })
                    except Exception as e:
                        print(f"Error parsing line: {line}, Error: {e}")
                        continue
    
    return transactions