Spaces:
Running
Running
import fitz # PyMuPDF | |
def extract_text_chunks_from_pdf(pdf_input, max_chars=4000): | |
try: | |
if isinstance(pdf_input, str): | |
doc = fitz.open(pdf_input) | |
else: | |
doc = fitz.open(stream=pdf_input.read(), filetype="pdf") | |
chunks = [] | |
current_chunk = "" | |
for page in doc: | |
text = page.get_text() | |
if len(current_chunk) + len(text) < max_chars: | |
current_chunk += "\n" + text | |
else: | |
chunks.append(current_chunk.strip()) | |
current_chunk = text | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
doc.close() | |
return chunks | |
except Exception as e: | |
return [f"[ERROR] PDF bölme hatası: {str(e)}"] | |