Spaces:
Sleeping
Sleeping
Create extract.py
Browse files- extract.py +19 -0
extract.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdfplumber
|
2 |
+
import pandas as pd
|
3 |
+
import re
|
4 |
+
|
5 |
+
def extract_upi_transactions(pdf_path):
|
6 |
+
transactions = []
|
7 |
+
|
8 |
+
with pdfplumber.open(pdf_path) as pdf:
|
9 |
+
for page in pdf.pages:
|
10 |
+
text = page.extract_text()
|
11 |
+
if text:
|
12 |
+
lines = text.split("\n")
|
13 |
+
for line in lines:
|
14 |
+
match = re.search(r'(\d{2}-\d{2}-\d{4})\s+([\w\s]+)\s+₹([\d,.]+)', line)
|
15 |
+
if match:
|
16 |
+
date, description, amount = match.groups()
|
17 |
+
transactions.append({"Date": date, "Description": description, "Amount": float(amount.replace(",", ""))})
|
18 |
+
|
19 |
+
return pd.DataFrame(transactions)
|