DHEIVER commited on
Commit
0f41986
·
verified ·
1 Parent(s): ebafa8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -20,7 +20,20 @@ class AdvancedPDFKnowledgeBase:
20
  self.bm25 = None
21
 
22
  def _split_into_chunks(self, text: str, chunk_size: int = 500) -> List[str]:
23
- words = text.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return [' '.join(words[i:i + chunk_size])
25
  for i in range(0, len(words), chunk_size)]
26
 
@@ -76,6 +89,7 @@ class AdvancedPDFKnowledgeBase:
76
  # Inicializa a base de conhecimento
77
  knowledge_base = AdvancedPDFKnowledgeBase()
78
 
 
79
  def respond(
80
  message: str,
81
  history: List[Tuple[str, str]],
@@ -126,7 +140,7 @@ Responda com base no contexto quando relevante."""
126
  yield response, context, ""
127
  except Exception as e:
128
  yield f"Erro ao gerar resposta: {str(e)}", context, ""
129
-
130
  # Função para carregar PDFs
131
  def load_pdfs(pdf_files: List[gr.File]):
132
  status = knowledge_base.load_pdfs(pdf_files)
@@ -154,7 +168,7 @@ with gr.Blocks(title="RAG Avançado com PDFs", theme=gr.themes.Soft()) as demo:
154
  with gr.Accordion("Configurações", open=False):
155
  with gr.Row():
156
  with gr.Column():
157
- pdf_upload = gr.File(label="Carregar PDFs", file_types=[".pdf"], file_count="multiple", interactive=True)
158
  load_btn = gr.Button("Carregar PDFs")
159
 
160
  with gr.Column():
 
20
  self.bm25 = None
21
 
22
  def _split_into_chunks(self, text: str, chunk_size: int = 500) -> List[str]:
23
+ # Remove linhas duplicadas ou muito semelhantes
24
+ lines = text.split("\n")
25
+ unique_lines = []
26
+ seen = set()
27
+ for line in lines:
28
+ if line.strip() and line not in seen:
29
+ unique_lines.append(line)
30
+ seen.add(line)
31
+
32
+ # Junta as linhas únicas em um único texto
33
+ cleaned_text = "\n".join(unique_lines)
34
+
35
+ # Divide o texto limpo em chunks
36
+ words = cleaned_text.split()
37
  return [' '.join(words[i:i + chunk_size])
38
  for i in range(0, len(words), chunk_size)]
39
 
 
89
  # Inicializa a base de conhecimento
90
  knowledge_base = AdvancedPDFKnowledgeBase()
91
 
92
+ # Função principal de resposta
93
  def respond(
94
  message: str,
95
  history: List[Tuple[str, str]],
 
140
  yield response, context, ""
141
  except Exception as e:
142
  yield f"Erro ao gerar resposta: {str(e)}", context, ""
143
+
144
  # Função para carregar PDFs
145
  def load_pdfs(pdf_files: List[gr.File]):
146
  status = knowledge_base.load_pdfs(pdf_files)
 
168
  with gr.Accordion("Configurações", open=False):
169
  with gr.Row():
170
  with gr.Column():
171
+ pdf_upload = gr.File(label="Carregar PDFs", file_types=[".pdf", ".txt"], file_count="multiple", interactive=True)
172
  load_btn = gr.Button("Carregar PDFs")
173
 
174
  with gr.Column():