Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,16 +11,20 @@ import os
|
|
11 |
import nltk
|
12 |
nltk.download('punkt')
|
13 |
|
14 |
-
# Install Poppler in the runtime environment
|
15 |
-
os.system("apt-get update && apt-get install -y poppler-utils")
|
16 |
|
17 |
secret = os.getenv('Groq_api')
|
18 |
|
19 |
working_dir = os.path.dirname(os.path.abspath(__file__))
|
20 |
|
21 |
def load_documents(file_path):
|
22 |
-
# Specify poppler_path to ensure compatibility
|
23 |
-
loader = UnstructuredPDFLoader(
|
|
|
|
|
|
|
|
|
24 |
documents = loader.load()
|
25 |
return documents
|
26 |
|
|
|
11 |
import nltk
|
12 |
nltk.download('punkt')
|
13 |
|
14 |
+
# Install Poppler and Tesseract in the runtime environment
|
15 |
+
os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
|
16 |
|
17 |
secret = os.getenv('Groq_api')
|
18 |
|
19 |
working_dir = os.path.dirname(os.path.abspath(__file__))
|
20 |
|
21 |
def load_documents(file_path):
|
22 |
+
# Specify poppler_path and tesseract_path to ensure compatibility
|
23 |
+
loader = UnstructuredPDFLoader(
|
24 |
+
file_path,
|
25 |
+
poppler_path="/usr/bin",
|
26 |
+
tesseract_path="/usr/bin/tesseract"
|
27 |
+
)
|
28 |
documents = loader.load()
|
29 |
return documents
|
30 |
|