Spaces:

lozanopastor
/

PDFChat

Running

App Files Files Community

lozanopastor commited on Feb 18

Commit

6ce66ba

verified ·

1 Parent(s): 9ef5861

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -30

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ load_dotenv()
 os.getenv("GROQ_API_KEY")
 def get_pdf_text(pdf_docs):
-    """Extracts text from uploaded PDF files."""
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
@@ -24,30 +24,30 @@ def get_pdf_text(pdf_docs):
     return text
 def get_text_chunks(text):
-    """Splits extracted text into manageable chunks."""
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vector_store(text_chunks):
-    """Creates and saves a FAISS vector store from text chunks."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
     vector_store.save_local("faiss_index")
 def get_conversational_chain():
-    """Sets up a conversational chain using Groq LLM."""
     prompt_template = """
-    Answer the question as detailed as possible from the provided context. If the answer is not in
-    the provided context, just say, "answer is not available in the context." Do not provide incorrect answers.
-    Context:
     {context}?
-    Question:
     {question}
-    Answer:
     """
     model = ChatGroq(
@@ -65,7 +65,7 @@ def eliminar_texto_entre_tags(texto):
     return texto_limpio
 def user_input(user_question):
-    """Handles user queries by retrieving answers from the vector store."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
@@ -78,65 +78,90 @@ def user_input(user_question):
         return_only_outputs=True
     )
-    # Debugging: Print the original response
     original_response = response['output_text']
     print("Original Response:", original_response)
-    # Extract the thought process
     thought_process = ""
     if "<think>" in response['output_text'] and "</think>" in response['output_text']:
         thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
         if thought_process_match:
             thought_process = thought_process_match.group(1).strip()
-    # Remove the thought process from the main response
     clean_response = eliminar_texto_entre_tags(original_response)
-    # Debugging: Print the cleaned response
     print("Cleaned Response:", clean_response)
-    # Display the model's thought process in the expander
-    with st.expander("Model Thought Process"):
         st.write(thought_process)
-    st.markdown(f"### Reply:\n{clean_response}")
 def main():
-    """Main function to run the Streamlit app."""
     st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
-    st.title("Chat with PDF using DeepSeek Ai")
-    st.sidebar.header("Upload & Process PDF Files")
     st.sidebar.markdown(
-        "Using DeepSeek R1 model for advanced conversational capabilities."
     )
     with st.sidebar:
         pdf_docs = st.file_uploader(
-            "Upload your PDF files:",
             accept_multiple_files=True,
             type=["pdf"]
         )
-        if st.button("Submit & Process"):
-            with st.spinner("Processing your files..."):
                 raw_text = get_pdf_text(pdf_docs)
                 text_chunks = get_text_chunks(raw_text)
                 get_vector_store(text_chunks)
-                st.success("PDFs processed and indexed successfully!")
     st.markdown(
-        "### Ask Questions from Your PDF Files :mag:\n"
-        "Once you upload and process your PDFs, type your questions below."
     )
-    user_question = st.text_input("Enter your question:", placeholder="What do you want to know?")
     if user_question:
-        with st.spinner("Fetching your answer..."):
             user_input(user_question)
     st.sidebar.info(
-        "**Note:** This app uses DeepSeek R1 model for answering questions accurately."
     )
 if __name__ == "__main__":

 os.getenv("GROQ_API_KEY")
 def get_pdf_text(pdf_docs):
+    """Extrae texto de los archivos PDF cargados."""
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
     return text
 def get_text_chunks(text):
+    """Divide el texto extraído en fragmentos manejables."""
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vector_store(text_chunks):
+    """Crea y guarda un almacén de vectores FAISS a partir de fragmentos de texto."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
     vector_store.save_local("faiss_index")
 def get_conversational_chain():
+    """Configura una cadena conversacional usando el modelo Groq LLM."""
     prompt_template = """
+    Responde la pregunta de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
+    el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
+    Contexto:
     {context}?
+    Pregunta:
     {question}
+    Respuesta:
     """
     model = ChatGroq(
     return texto_limpio
 def user_input(user_question):
+    """Maneja las consultas del usuario recuperando respuestas del almacén de vectores."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
         return_only_outputs=True
     )
+    # Depuración: Imprimir la respuesta original
     original_response = response['output_text']
     print("Original Response:", original_response)
+    # Extraer el proceso de pensamiento
     thought_process = ""
     if "<think>" in response['output_text'] and "</think>" in response['output_text']:
         thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
         if thought_process_match:
             thought_process = thought_process_match.group(1).strip()
+    # Eliminar el proceso de pensamiento de la respuesta principal
     clean_response = eliminar_texto_entre_tags(original_response)
+    # Depuración: Imprimir la respuesta limpia
     print("Cleaned Response:", clean_response)
+    # Mostrar el proceso de pensamiento del modelo en el expander
+    with st.expander("Proceso de Pensamiento del Modelo"):
         st.write(thought_process)
+    st.markdown(f"### Respuesta:\n{clean_response}")
 def main():
+    """Función principal para ejecutar la aplicación Streamlit."""
     st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
+    # Configuración de la apariencia de la aplicación
+    st.markdown(
+        """
+        <style>
+        body {
+            background-color: #1E90FF;
+            color: white;
+        }
+        .sidebar .sidebar-content {
+            background-color: #00008B;
+        }
+        .main {
+            background-color: #00008B;
+            color: white;
+        }
+        .stButton>button {
+            background-color: #1E90FF;
+            color: white;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True
+    )
+    st.title("Chat con PDF usando DeepSeek AI")
+    st.sidebar.header("Subir y Procesar Archivos PDF")
     st.sidebar.markdown(
+        "Usando el modelo DeepSeek R1 para capacidades conversacionales avanzadas."
     )
     with st.sidebar:
         pdf_docs = st.file_uploader(
+            "Sube tus archivos PDF:",
             accept_multiple_files=True,
             type=["pdf"]
         )
+        if st.button("Enviar y Procesar"):
+            with st.spinner("Procesando tus archivos..."):
                 raw_text = get_pdf_text(pdf_docs)
                 text_chunks = get_text_chunks(raw_text)
                 get_vector_store(text_chunks)
+                st.success("¡PDFs procesados e indexados exitosamente!")
     st.markdown(
+        "### Haz Preguntas sobre tus Archivos PDF :mag:\n"
+        "Una vez que subas y proceses tus PDFs, escribe tus preguntas a continuación."
     )
+    user_question = st.text_input("Introduce tu pregunta:", placeholder="¿Qué quieres saber?")
     if user_question:
+        with st.spinner("Obteniendo tu respuesta..."):
             user_input(user_question)
     st.sidebar.info(
+        "**Nota:** Esta aplicación usa el modelo DeepSeek R1 para responder preguntas de manera precisa."
     )
 if __name__ == "__main__":