File size: 780 Bytes
cc21f11
 
a8d7146
cc21f11
a8d7146
cc21f11
 
a8d7146
0303b9b
a8d7146
 
cc21f11
a8d7146
 
 
 
 
 
 
0303b9b
a8d7146
 
0303b9b
a8d7146
 
cc21f11
 
a8d7146
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import fitz  # PyMuPDF

def extract_text_chunks_from_pdf(pdf_input, max_chars=4000):
    try:
        if isinstance(pdf_input, str):
            doc = fitz.open(pdf_input)
        else:
            doc = fitz.open(stream=pdf_input.read(), filetype="pdf")

        chunks = []
        current_chunk = ""

        for page in doc:
            text = page.get_text()
            if len(current_chunk) + len(text) < max_chars:
                current_chunk += "\n" + text
            else:
                chunks.append(current_chunk.strip())
                current_chunk = text

        if current_chunk:
            chunks.append(current_chunk.strip())

        doc.close()
        return chunks

    except Exception as e:
        return [f"[ERROR] PDF bölme hatası: {str(e)}"]