Ujeshhh commited on
Commit
009244c
·
verified ·
1 Parent(s): dab9ea3

Create extract.py

Browse files
Files changed (1) hide show
  1. extract.py +19 -0
extract.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ import pandas as pd
3
+ import re
4
+
5
+ def extract_upi_transactions(pdf_path):
6
+ transactions = []
7
+
8
+ with pdfplumber.open(pdf_path) as pdf:
9
+ for page in pdf.pages:
10
+ text = page.extract_text()
11
+ if text:
12
+ lines = text.split("\n")
13
+ for line in lines:
14
+ match = re.search(r'(\d{2}-\d{2}-\d{4})\s+([\w\s]+)\s+₹([\d,.]+)', line)
15
+ if match:
16
+ date, description, amount = match.groups()
17
+ transactions.append({"Date": date, "Description": description, "Amount": float(amount.replace(",", ""))})
18
+
19
+ return pd.DataFrame(transactions)