Spaces:
Running
Running
import fitz # PyMuPDF | |
def extract_text_chunks_from_pdf(pdf_input, start=1, end=None, max_chars=4000): | |
try: | |
if hasattr(pdf_input, "read"): | |
doc = fitz.open(stream=pdf_input.read(), filetype="pdf") | |
else: | |
doc = fitz.open(pdf_input) | |
total_pages = len(doc) | |
# Sayfa sınırı kontrolleri | |
if start < 1: | |
start = 1 | |
if end is None or end > total_pages: | |
end = total_pages | |
if end < start: | |
end = start | |
chunks = [] | |
current_chunk = "" | |
for i in range(start - 1, end): | |
page = doc[i] | |
text = page.get_text() | |
if len(current_chunk) + len(text) < max_chars: | |
current_chunk += "\n" + text | |
else: | |
chunks.append(current_chunk.strip()) | |
current_chunk = text | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
doc.close() | |
return chunks | |
except Exception as e: | |
return [f"[ERROR] PDF bölme hatası: {str(e)}"] | |