Spaces:
Runtime error
Runtime error
Witold Wydmański
commited on
Commit
·
5415ed9
1
Parent(s):
c914e02
feat: return file instead of text
Browse files
app.py
CHANGED
@@ -27,17 +27,17 @@ def tesseract_ocr(image, progress=gr.Progress()):
|
|
27 |
img.load()
|
28 |
text = pytesseract.image_to_string(img)
|
29 |
text_res.append(text)
|
30 |
-
return text
|
31 |
|
|
|
|
|
|
|
32 |
|
33 |
-
if __name__=="__main__":
|
34 |
-
#make sure that flagged/ dir is created
|
35 |
-
os.chdir("/code")
|
36 |
|
|
|
37 |
iface = gr.Interface(
|
38 |
fn=tesseract_ocr,
|
39 |
inputs=[gr.File(label="PDF file")],
|
40 |
-
outputs=gr.
|
41 |
title="PDF to Text Converter",
|
42 |
description="Converts a PDF file to text using Tesseract OCR.",
|
43 |
).queue(concurrency_count=10)
|
|
|
27 |
img.load()
|
28 |
text = pytesseract.image_to_string(img)
|
29 |
text_res.append(text)
|
|
|
30 |
|
31 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as file:
|
32 |
+
file.write("\n".join(text_res))
|
33 |
+
return file.name
|
34 |
|
|
|
|
|
|
|
35 |
|
36 |
+
if __name__=="__main__":
|
37 |
iface = gr.Interface(
|
38 |
fn=tesseract_ocr,
|
39 |
inputs=[gr.File(label="PDF file")],
|
40 |
+
outputs=gr.File(label="Text file", type="file", encoding="utf-8"),
|
41 |
title="PDF to Text Converter",
|
42 |
description="Converts a PDF file to text using Tesseract OCR.",
|
43 |
).queue(concurrency_count=10)
|