HemanM commited on
Commit
b6e3c51
·
verified ·
1 Parent(s): 80cdad4

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +12 -0
utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz # PyMuPDF
3
+
4
+ def extract_text_from_file(file_obj):
5
+ name = file_obj.name
6
+ if name.endswith(".pdf"):
7
+ doc = fitz.open(stream=file_obj.read(), filetype="pdf")
8
+ return "\n".join(page.get_text() for page in doc)
9
+ elif name.endswith(".txt"):
10
+ return file_obj.read().decode("utf-8")
11
+ else:
12
+ return ""