Spaces:

andreeabodea
/

extract_project_report

Runtime error

andreeabodea commited on Mar 27, 2024

Commit

87af43d

verified ·

1 Parent(s): 29399bd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import re
 import fitz  # PyMuPDF
 import json
-files = [f for f in os.listdir("/Users/andreeabodea/") if f.endswith(".pdf")]
-print(files)
 """
 Extract the text from a section of a PDF file  between 'wanted_section' and 'next_section'.
@@ -48,12 +48,8 @@ def get_section(path, wanted_section, next_section):
         final_text = new_text.replace(special_char, special_char_replacement_list[index])
     return final_text
-for file in files:
-    print("for each pdf file...")
-    path = "/Users/andreeabodea/" + file
-    pdf = pdfplumber.open(path)
-    print(path)
     results_dict = {}
     results_dict["2.1 Aktualisierte Einordnung des Moduls in das EZ-Programm"] = \
@@ -88,5 +84,11 @@ for file in files:
     #json_string = json.dumps(results_dict, indent=4)
     #print(json_string)
-# iface = gr.Interface(fn=get_section, inputs="text", outputs="text")
-# iface.launch()

 import fitz  # PyMuPDF
 import json
+#files = [f for f in os.listdir("/Users/andreeabodea/") if f.endswith(".pdf")]
+#print(files)
 """
 Extract the text from a section of a PDF file  between 'wanted_section' and 'next_section'.
         final_text = new_text.replace(special_char, special_char_replacement_list[index])
     return final_text
+def process_pdf(path):
     results_dict = {}
     results_dict["2.1 Aktualisierte Einordnung des Moduls in das EZ-Programm"] = \
     #json_string = json.dumps(results_dict, indent=4)
     #print(json_string)
+# Define the Gradio interface
+iface = gr.Interface(fn=process_pdf,
+                     inputs=gr.inputs.File(type="file", label="Upload PDF"),
+                     outputs="text",
+                     title="PDF Text Extractor",
+                     description="Upload a PDF file to extract all its text.")
+iface.launch()