PersonalUPI / extract.py
Ujeshhh's picture
Create extract.py
009244c verified
raw
history blame
690 Bytes
import pdfplumber
import pandas as pd
import re
def extract_upi_transactions(pdf_path):
transactions = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
lines = text.split("\n")
for line in lines:
match = re.search(r'(\d{2}-\d{2}-\d{4})\s+([\w\s]+)\s+₹([\d,.]+)', line)
if match:
date, description, amount = match.groups()
transactions.append({"Date": date, "Description": description, "Amount": float(amount.replace(",", ""))})
return pd.DataFrame(transactions)