File size: 1,078 Bytes
cc21f11
 
3c3759a
cc21f11
3c3759a
a8d7146
3c3759a
 
 
2ca41ce
 
 
 
 
 
 
 
 
0303b9b
a8d7146
 
cc21f11
3c3759a
 
a8d7146
 
 
 
 
 
0303b9b
a8d7146
 
0303b9b
a8d7146
 
cc21f11
 
a8d7146
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import fitz  # PyMuPDF

def extract_text_chunks_from_pdf(pdf_input, start=1, end=None, max_chars=4000):
    try:
        if hasattr(pdf_input, "read"):
            doc = fitz.open(stream=pdf_input.read(), filetype="pdf")
        else:
            doc = fitz.open(pdf_input)

        total_pages = len(doc)

        # Sayfa sınırı kontrolleri
        if start < 1:
            start = 1
        if end is None or end > total_pages:
            end = total_pages
        if end < start:
            end = start

        chunks = []
        current_chunk = ""

        for i in range(start - 1, end):
            page = doc[i]
            text = page.get_text()
            if len(current_chunk) + len(text) < max_chars:
                current_chunk += "\n" + text
            else:
                chunks.append(current_chunk.strip())
                current_chunk = text

        if current_chunk:
            chunks.append(current_chunk.strip())

        doc.close()
        return chunks

    except Exception as e:
        return [f"[ERROR] PDF bölme hatası: {str(e)}"]