lozanopastor commited on
Commit
6ce66ba
verified
1 Parent(s): 9ef5861

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -30
app.py CHANGED
@@ -15,7 +15,7 @@ load_dotenv()
15
  os.getenv("GROQ_API_KEY")
16
 
17
  def get_pdf_text(pdf_docs):
18
- """Extracts text from uploaded PDF files."""
19
  text = ""
20
  for pdf in pdf_docs:
21
  pdf_reader = PdfReader(pdf)
@@ -24,30 +24,30 @@ def get_pdf_text(pdf_docs):
24
  return text
25
 
26
  def get_text_chunks(text):
27
- """Splits extracted text into manageable chunks."""
28
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
29
  chunks = text_splitter.split_text(text)
30
  return chunks
31
 
32
  def get_vector_store(text_chunks):
33
- """Creates and saves a FAISS vector store from text chunks."""
34
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
35
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
36
  vector_store.save_local("faiss_index")
37
 
38
  def get_conversational_chain():
39
- """Sets up a conversational chain using Groq LLM."""
40
  prompt_template = """
41
- Answer the question as detailed as possible from the provided context. If the answer is not in
42
- the provided context, just say, "answer is not available in the context." Do not provide incorrect answers.
43
 
44
- Context:
45
  {context}?
46
 
47
- Question:
48
  {question}
49
 
50
- Answer:
51
  """
52
 
53
  model = ChatGroq(
@@ -65,7 +65,7 @@ def eliminar_texto_entre_tags(texto):
65
  return texto_limpio
66
 
67
  def user_input(user_question):
68
- """Handles user queries by retrieving answers from the vector store."""
69
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
 
71
  new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
@@ -78,65 +78,90 @@ def user_input(user_question):
78
  return_only_outputs=True
79
  )
80
 
81
- # Debugging: Print the original response
82
  original_response = response['output_text']
83
  print("Original Response:", original_response)
84
 
85
- # Extract the thought process
86
  thought_process = ""
87
  if "<think>" in response['output_text'] and "</think>" in response['output_text']:
88
  thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
89
  if thought_process_match:
90
  thought_process = thought_process_match.group(1).strip()
91
 
92
- # Remove the thought process from the main response
93
  clean_response = eliminar_texto_entre_tags(original_response)
94
 
95
- # Debugging: Print the cleaned response
96
  print("Cleaned Response:", clean_response)
97
 
98
- # Display the model's thought process in the expander
99
- with st.expander("Model Thought Process"):
100
  st.write(thought_process)
101
 
102
- st.markdown(f"### Reply:\n{clean_response}")
103
 
104
  def main():
105
- """Main function to run the Streamlit app."""
106
  st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
107
- st.title("Chat with PDF using DeepSeek Ai")
108
 
109
- st.sidebar.header("Upload & Process PDF Files")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  st.sidebar.markdown(
111
- "Using DeepSeek R1 model for advanced conversational capabilities."
112
  )
113
 
114
  with st.sidebar:
115
  pdf_docs = st.file_uploader(
116
- "Upload your PDF files:",
117
  accept_multiple_files=True,
118
  type=["pdf"]
119
  )
120
- if st.button("Submit & Process"):
121
- with st.spinner("Processing your files..."):
122
  raw_text = get_pdf_text(pdf_docs)
123
  text_chunks = get_text_chunks(raw_text)
124
  get_vector_store(text_chunks)
125
- st.success("PDFs processed and indexed successfully!")
126
 
127
  st.markdown(
128
- "### Ask Questions from Your PDF Files :mag:\n"
129
- "Once you upload and process your PDFs, type your questions below."
130
  )
131
 
132
- user_question = st.text_input("Enter your question:", placeholder="What do you want to know?")
133
 
134
  if user_question:
135
- with st.spinner("Fetching your answer..."):
136
  user_input(user_question)
137
 
138
  st.sidebar.info(
139
- "**Note:** This app uses DeepSeek R1 model for answering questions accurately."
140
  )
141
 
142
  if __name__ == "__main__":
 
15
  os.getenv("GROQ_API_KEY")
16
 
17
  def get_pdf_text(pdf_docs):
18
+ """Extrae texto de los archivos PDF cargados."""
19
  text = ""
20
  for pdf in pdf_docs:
21
  pdf_reader = PdfReader(pdf)
 
24
  return text
25
 
26
  def get_text_chunks(text):
27
+ """Divide el texto extra铆do en fragmentos manejables."""
28
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
29
  chunks = text_splitter.split_text(text)
30
  return chunks
31
 
32
  def get_vector_store(text_chunks):
33
+ """Crea y guarda un almac茅n de vectores FAISS a partir de fragmentos de texto."""
34
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
35
  vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
36
  vector_store.save_local("faiss_index")
37
 
38
  def get_conversational_chain():
39
+ """Configura una cadena conversacional usando el modelo Groq LLM."""
40
  prompt_template = """
41
+ Responde la pregunta de la manera m谩s detallada posible a partir del contexto proporcionado. Si la respuesta no est谩 en
42
+ el contexto proporcionado, simplemente di, "la respuesta no est谩 disponible en el contexto." No proporciones respuestas incorrectas.
43
 
44
+ Contexto:
45
  {context}?
46
 
47
+ Pregunta:
48
  {question}
49
 
50
+ Respuesta:
51
  """
52
 
53
  model = ChatGroq(
 
65
  return texto_limpio
66
 
67
  def user_input(user_question):
68
+ """Maneja las consultas del usuario recuperando respuestas del almac茅n de vectores."""
69
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
70
 
71
  new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
 
78
  return_only_outputs=True
79
  )
80
 
81
+ # Depuraci贸n: Imprimir la respuesta original
82
  original_response = response['output_text']
83
  print("Original Response:", original_response)
84
 
85
+ # Extraer el proceso de pensamiento
86
  thought_process = ""
87
  if "<think>" in response['output_text'] and "</think>" in response['output_text']:
88
  thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
89
  if thought_process_match:
90
  thought_process = thought_process_match.group(1).strip()
91
 
92
+ # Eliminar el proceso de pensamiento de la respuesta principal
93
  clean_response = eliminar_texto_entre_tags(original_response)
94
 
95
+ # Depuraci贸n: Imprimir la respuesta limpia
96
  print("Cleaned Response:", clean_response)
97
 
98
+ # Mostrar el proceso de pensamiento del modelo en el expander
99
+ with st.expander("Proceso de Pensamiento del Modelo"):
100
  st.write(thought_process)
101
 
102
+ st.markdown(f"### Respuesta:\n{clean_response}")
103
 
104
  def main():
105
+ """Funci贸n principal para ejecutar la aplicaci贸n Streamlit."""
106
  st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
 
107
 
108
+ # Configuraci贸n de la apariencia de la aplicaci贸n
109
+ st.markdown(
110
+ """
111
+ <style>
112
+ body {
113
+ background-color: #1E90FF;
114
+ color: white;
115
+ }
116
+ .sidebar .sidebar-content {
117
+ background-color: #00008B;
118
+ }
119
+ .main {
120
+ background-color: #00008B;
121
+ color: white;
122
+ }
123
+ .stButton>button {
124
+ background-color: #1E90FF;
125
+ color: white;
126
+ }
127
+ </style>
128
+ """,
129
+ unsafe_allow_html=True
130
+ )
131
+
132
+ st.title("Chat con PDF usando DeepSeek AI")
133
+
134
+ st.sidebar.header("Subir y Procesar Archivos PDF")
135
  st.sidebar.markdown(
136
+ "Usando el modelo DeepSeek R1 para capacidades conversacionales avanzadas."
137
  )
138
 
139
  with st.sidebar:
140
  pdf_docs = st.file_uploader(
141
+ "Sube tus archivos PDF:",
142
  accept_multiple_files=True,
143
  type=["pdf"]
144
  )
145
+ if st.button("Enviar y Procesar"):
146
+ with st.spinner("Procesando tus archivos..."):
147
  raw_text = get_pdf_text(pdf_docs)
148
  text_chunks = get_text_chunks(raw_text)
149
  get_vector_store(text_chunks)
150
+ st.success("PDFs procesados e indexados exitosamente!")
151
 
152
  st.markdown(
153
+ "### Haz Preguntas sobre tus Archivos PDF :mag:\n"
154
+ "Una vez que subas y proceses tus PDFs, escribe tus preguntas a continuaci贸n."
155
  )
156
 
157
+ user_question = st.text_input("Introduce tu pregunta:", placeholder="驴Qu茅 quieres saber?")
158
 
159
  if user_question:
160
+ with st.spinner("Obteniendo tu respuesta..."):
161
  user_input(user_question)
162
 
163
  st.sidebar.info(
164
+ "**Nota:** Esta aplicaci贸n usa el modelo DeepSeek R1 para responder preguntas de manera precisa."
165
  )
166
 
167
  if __name__ == "__main__":