Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +4 -3
lib/read_pdf.py
CHANGED
@@ -104,10 +104,11 @@ def extract_and_format_paragraphs(pdf_path):
|
|
104 |
width = page.width
|
105 |
height = page.height
|
106 |
|
107 |
-
#
|
108 |
-
|
109 |
-
right_bbox = (width / 2, 0, width, height) # Right column
|
110 |
|
|
|
|
|
111 |
# Extract text from the left column
|
112 |
left_column_text = page.within_bbox(left_bbox).extract_text() or ""
|
113 |
# Clean the left column text
|
|
|
104 |
width = page.width
|
105 |
height = page.height
|
106 |
|
107 |
+
header_height = height * 0.1 # Adjust this value based on your PDF
|
108 |
+
#footer_height = height * 0.1 # Adjust this value based on your PDF
|
|
|
109 |
|
110 |
+
left_bbox = (0, header_height, width / 2, height - footer_height) # Left column
|
111 |
+
right_bbox = (width / 2, header_height, width, height)
|
112 |
# Extract text from the left column
|
113 |
left_column_text = page.within_bbox(left_bbox).extract_text() or ""
|
114 |
# Clean the left column text
|