Spaces:
Sleeping
Sleeping
Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import fitz # PyMuPDF
|
| 3 |
+
|
| 4 |
+
def extract_text_from_file(file_obj):
|
| 5 |
+
name = file_obj.name
|
| 6 |
+
if name.endswith(".pdf"):
|
| 7 |
+
doc = fitz.open(stream=file_obj.read(), filetype="pdf")
|
| 8 |
+
return "\n".join(page.get_text() for page in doc)
|
| 9 |
+
elif name.endswith(".txt"):
|
| 10 |
+
return file_obj.read().decode("utf-8")
|
| 11 |
+
else:
|
| 12 |
+
return ""
|