Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,21 +19,26 @@ def ocr_pdf(file_path):
|
|
19 |
|
20 |
# Save the extracted text to a .txt file in a persistent location
|
21 |
output_txt_path = os.path.join(temp_dir, "extracted_text.txt")
|
22 |
-
with open(output_txt_path, "w") as f:
|
23 |
f.write(extracted_text)
|
24 |
|
25 |
# Create a persistent file to serve for download
|
26 |
final_output_path = "/tmp/extracted_text.txt"
|
27 |
shutil.copy(output_txt_path, final_output_path) # Copy to a persistent location
|
28 |
|
29 |
-
|
|
|
30 |
|
31 |
# Gradio Interface
|
32 |
iface = gr.Interface(
|
33 |
fn=lambda file: ocr_pdf(file.name), # Pass file path instead of file object
|
34 |
inputs=gr.File(label="Upload PDF File"),
|
35 |
-
outputs=
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
)
|
38 |
|
39 |
if __name__ == "__main__":
|
|
|
19 |
|
20 |
# Save the extracted text to a .txt file in a persistent location
|
21 |
output_txt_path = os.path.join(temp_dir, "extracted_text.txt")
|
22 |
+
with open(output_txt_path, "w", encoding="utf-8") as f:
|
23 |
f.write(extracted_text)
|
24 |
|
25 |
# Create a persistent file to serve for download
|
26 |
final_output_path = "/tmp/extracted_text.txt"
|
27 |
shutil.copy(output_txt_path, final_output_path) # Copy to a persistent location
|
28 |
|
29 |
+
# Return both: actual text and path (for download)
|
30 |
+
return extracted_text, final_output_path
|
31 |
|
32 |
# Gradio Interface
|
33 |
iface = gr.Interface(
|
34 |
fn=lambda file: ocr_pdf(file.name), # Pass file path instead of file object
|
35 |
inputs=gr.File(label="Upload PDF File"),
|
36 |
+
outputs=[
|
37 |
+
gr.Textbox(label="Extracted Text"), # Shows text directly
|
38 |
+
gr.File(label="Download Extracted Text (.txt)") # Optional download
|
39 |
+
],
|
40 |
+
title="PDF to Text OCR",
|
41 |
+
allow_flagging="never"
|
42 |
)
|
43 |
|
44 |
if __name__ == "__main__":
|