Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -205,27 +205,20 @@ def generate_pdf_conversion_stream(pdf_input_source_path_or_url):
|
|
205 |
|
206 |
page_text = page.extract_text(layout=True, x_density=1, y_density=1) or ""
|
207 |
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
yield yield_message("status", {"message": f" Processing table {table_idx+1} on page {i+1}..."})
|
214 |
-
header_cells = table_data[0]
|
215 |
-
header = [" | ".join(str(cell) if cell is not None else "" for cell in header_cells)]
|
216 |
-
separator = [" | ".join(["---"] * len(header_cells))]
|
217 |
-
body = [" | ".join(str(cell) if cell is not None else "" for cell in row) for row in table_data[1:]]
|
218 |
-
table_md_lines = header + separator + body
|
219 |
-
page_tables_md += f"**Table (Page {i+1}):**\n" + "\n".join(table_md_lines) + "\n\n"
|
220 |
|
221 |
formatted_page_text_md = format_page_text_to_markdown_chunk(page_text)
|
222 |
|
223 |
yield yield_message("markdown_chunk", {"content": formatted_page_text_md})
|
224 |
-
if page_tables_md:
|
225 |
-
|
226 |
time.sleep(0.01)
|
227 |
except Exception as e:
|
228 |
-
logger.error(f"Error during PDF text
|
229 |
yield yield_message("error", {"message": f"Error during text extraction: {str(e)}"})
|
230 |
|
231 |
if not check_poppler():
|
|
|
205 |
|
206 |
page_text = page.extract_text(layout=True, x_density=1, y_density=1) or ""
|
207 |
|
208 |
+
# Removed table extraction logic here
|
209 |
+
# page_tables_md = "" # No longer needed
|
210 |
+
# tables = page.extract_tables() # No longer needed
|
211 |
+
# if tables: # No longer needed
|
212 |
+
# ... (table processing code removed) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
formatted_page_text_md = format_page_text_to_markdown_chunk(page_text)
|
215 |
|
216 |
yield yield_message("markdown_chunk", {"content": formatted_page_text_md})
|
217 |
+
# if page_tables_md: # No longer needed, as page_tables_md is not created
|
218 |
+
# yield yield_message("markdown_chunk", {"content": page_tables_md})
|
219 |
time.sleep(0.01)
|
220 |
except Exception as e:
|
221 |
+
logger.error(f"Error during PDF text extraction: {str(e)}", exc_info=True) # Updated log message
|
222 |
yield yield_message("error", {"message": f"Error during text extraction: {str(e)}"})
|
223 |
|
224 |
if not check_poppler():
|