Manasa1 commited on
Commit
57a73aa
·
verified ·
1 Parent(s): 64a2736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -11,16 +11,20 @@ import os
11
  import nltk
12
  nltk.download('punkt')
13
 
14
- # Install Poppler in the runtime environment
15
- os.system("apt-get update && apt-get install -y poppler-utils")
16
 
17
  secret = os.getenv('Groq_api')
18
 
19
  working_dir = os.path.dirname(os.path.abspath(__file__))
20
 
21
  def load_documents(file_path):
22
- # Specify poppler_path to ensure compatibility in Spaces
23
- loader = UnstructuredPDFLoader(file_path, poppler_path="/usr/bin")
 
 
 
 
24
  documents = loader.load()
25
  return documents
26
 
 
11
  import nltk
12
  nltk.download('punkt')
13
 
14
+ # Install Poppler and Tesseract in the runtime environment
15
+ os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
16
 
17
  secret = os.getenv('Groq_api')
18
 
19
  working_dir = os.path.dirname(os.path.abspath(__file__))
20
 
21
  def load_documents(file_path):
22
+ # Specify poppler_path and tesseract_path to ensure compatibility
23
+ loader = UnstructuredPDFLoader(
24
+ file_path,
25
+ poppler_path="/usr/bin",
26
+ tesseract_path="/usr/bin/tesseract"
27
+ )
28
  documents = loader.load()
29
  return documents
30