File size: 1,174 Bytes
1d54def |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import pdfplumber
import re
from datetime import datetime
def parse_invoice(pdf_file):
transactions = []
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
text = page.extract_text()
lines = text.split("\n")
for line in lines:
pattern = r"(\d{2}/\d{2}/\d{4})\s+(.+?)\s+([\d,.]+)\s+(.+)"
match = re.match(pattern, line.strip())
if match:
date_str, vendor, amount, description = match.groups()
try:
date = datetime.strptime(date_str, "%d/%m/%Y")
amount = float(amount.replace(",", ""))
transactions.append({
"date": date,
"vendor": vendor.strip(),
"amount": amount,
"description": description.strip()
})
except Exception as e:
print(f"Error parsing line: {line}, Error: {e}")
continue
return transactions |