Spaces:

drewThomasson
/

PDF-to-TXT-OCR

Running

drewThomasson commited on Oct 9, 2024

Commit

225af8d

verified ·

1 Parent(s): 51a779d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ def ocr_pdf(file_path):
         extracted_text = ""
         for i, image in enumerate(images):
             text = pytesseract.image_to_string(image)
-            extracted_text += f"--- Page {i+1} ---\n{text}\n\n"
         # Save the extracted text to a .txt file in a persistent location
         output_txt_path = os.path.join(temp_dir, "extracted_text.txt")

         extracted_text = ""
         for i, image in enumerate(images):
             text = pytesseract.image_to_string(image)
+            extracted_text += f"\n{text}\n\n"
         # Save the extracted text to a .txt file in a persistent location
         output_txt_path = os.path.join(temp_dir, "extracted_text.txt")