Spaces:

Vartex39
/

vizsum-pro

Running

vizsum-pro / pdf_reader.py

UI tarafına sayfa minimum değeri ve görünürlük kontrolü eklendi; PDF sayfa aralığı güvenli hale getirildi

2ca41ce 3 days ago

1.08 kB

	import fitz # PyMuPDF

	def extract_text_chunks_from_pdf(pdf_input, start=1, end=None, max_chars=4000):
	try:
	if hasattr(pdf_input, "read"):
	doc = fitz.open(stream=pdf_input.read(), filetype="pdf")
	else:
	doc = fitz.open(pdf_input)

	total_pages = len(doc)

	# Sayfa sınırı kontrolleri
	if start < 1:
	start = 1
	if end is None or end > total_pages:
	end = total_pages
	if end < start:
	end = start

	chunks = []
	current_chunk = ""

	for i in range(start - 1, end):
	page = doc[i]
	text = page.get_text()
	if len(current_chunk) + len(text) < max_chars:
	current_chunk += "\n" + text
	else:
	chunks.append(current_chunk.strip())
	current_chunk = text

	if current_chunk:
	chunks.append(current_chunk.strip())

	doc.close()
	return chunks

	except Exception as e:
	return [f"[ERROR] PDF bölme hatası: {str(e)}"]