Spaces:
Running
Running
File size: 1,078 Bytes
cc21f11 3c3759a cc21f11 3c3759a a8d7146 3c3759a 2ca41ce 0303b9b a8d7146 cc21f11 3c3759a a8d7146 0303b9b a8d7146 0303b9b a8d7146 cc21f11 a8d7146 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import fitz # PyMuPDF
def extract_text_chunks_from_pdf(pdf_input, start=1, end=None, max_chars=4000):
try:
if hasattr(pdf_input, "read"):
doc = fitz.open(stream=pdf_input.read(), filetype="pdf")
else:
doc = fitz.open(pdf_input)
total_pages = len(doc)
# Sayfa sınırı kontrolleri
if start < 1:
start = 1
if end is None or end > total_pages:
end = total_pages
if end < start:
end = start
chunks = []
current_chunk = ""
for i in range(start - 1, end):
page = doc[i]
text = page.get_text()
if len(current_chunk) + len(text) < max_chars:
current_chunk += "\n" + text
else:
chunks.append(current_chunk.strip())
current_chunk = text
if current_chunk:
chunks.append(current_chunk.strip())
doc.close()
return chunks
except Exception as e:
return [f"[ERROR] PDF bölme hatası: {str(e)}"]
|