Spaces:
Running
Running
luanpoppe
commited on
Commit
·
32df555
1
Parent(s):
605a49c
fix: minor fix
Browse files
_utils/splitters/Splitter_class.py
CHANGED
|
@@ -39,6 +39,7 @@ class Splitter:
|
|
| 39 |
# pages = get_pdf_from_bubble(
|
| 40 |
# pdf_path
|
| 41 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
|
|
|
| 42 |
|
| 43 |
initial_chunks: List[str] = []
|
| 44 |
|
|
@@ -48,7 +49,6 @@ class Splitter:
|
|
| 48 |
page_boundaries, combined_text = (
|
| 49 |
combine_documents_without_losing_pagination(pages)
|
| 50 |
)
|
| 51 |
-
full_text_as_string = ""
|
| 52 |
for page in pages:
|
| 53 |
full_text_as_string = full_text_as_string + page.page_content
|
| 54 |
initial_chunks = initial_chunks + self.text_splitter.split_text(
|
|
|
|
| 39 |
# pages = get_pdf_from_bubble(
|
| 40 |
# pdf_path
|
| 41 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
| 42 |
+
full_text_as_string = ""
|
| 43 |
|
| 44 |
initial_chunks: List[str] = []
|
| 45 |
|
|
|
|
| 49 |
page_boundaries, combined_text = (
|
| 50 |
combine_documents_without_losing_pagination(pages)
|
| 51 |
)
|
|
|
|
| 52 |
for page in pages:
|
| 53 |
full_text_as_string = full_text_as_string + page.page_content
|
| 54 |
initial_chunks = initial_chunks + self.text_splitter.split_text(
|