Spaces:
Sleeping
Sleeping
import pdfplumber | |
import pandas as pd | |
import re | |
def extract_upi_transactions(pdf_path): | |
transactions = [] | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
text = page.extract_text() | |
if text: | |
lines = text.split("\n") | |
for line in lines: | |
match = re.search(r'(\d{2}-\d{2}-\d{4})\s+([\w\s]+)\s+₹([\d,.]+)', line) | |
if match: | |
date, description, amount = match.groups() | |
transactions.append({"Date": date, "Description": description, "Amount": float(amount.replace(",", ""))}) | |
return pd.DataFrame(transactions) |