Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ load_dotenv()
|
|
15 |
os.getenv("GROQ_API_KEY")
|
16 |
|
17 |
def get_pdf_text(pdf_docs):
|
18 |
-
"""
|
19 |
text = ""
|
20 |
for pdf in pdf_docs:
|
21 |
pdf_reader = PdfReader(pdf)
|
@@ -24,30 +24,30 @@ def get_pdf_text(pdf_docs):
|
|
24 |
return text
|
25 |
|
26 |
def get_text_chunks(text):
|
27 |
-
"""
|
28 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
29 |
chunks = text_splitter.split_text(text)
|
30 |
return chunks
|
31 |
|
32 |
def get_vector_store(text_chunks):
|
33 |
-
"""
|
34 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
35 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
36 |
vector_store.save_local("faiss_index")
|
37 |
|
38 |
def get_conversational_chain():
|
39 |
-
"""
|
40 |
prompt_template = """
|
41 |
-
|
42 |
-
|
43 |
|
44 |
-
|
45 |
{context}?
|
46 |
|
47 |
-
|
48 |
{question}
|
49 |
|
50 |
-
|
51 |
"""
|
52 |
|
53 |
model = ChatGroq(
|
@@ -65,7 +65,7 @@ def eliminar_texto_entre_tags(texto):
|
|
65 |
return texto_limpio
|
66 |
|
67 |
def user_input(user_question):
|
68 |
-
"""
|
69 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
70 |
|
71 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
@@ -78,65 +78,90 @@ def user_input(user_question):
|
|
78 |
return_only_outputs=True
|
79 |
)
|
80 |
|
81 |
-
#
|
82 |
original_response = response['output_text']
|
83 |
print("Original Response:", original_response)
|
84 |
|
85 |
-
#
|
86 |
thought_process = ""
|
87 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
88 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
89 |
if thought_process_match:
|
90 |
thought_process = thought_process_match.group(1).strip()
|
91 |
|
92 |
-
#
|
93 |
clean_response = eliminar_texto_entre_tags(original_response)
|
94 |
|
95 |
-
#
|
96 |
print("Cleaned Response:", clean_response)
|
97 |
|
98 |
-
#
|
99 |
-
with st.expander("
|
100 |
st.write(thought_process)
|
101 |
|
102 |
-
st.markdown(f"###
|
103 |
|
104 |
def main():
|
105 |
-
"""
|
106 |
st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
|
107 |
-
st.title("Chat with PDF using DeepSeek Ai")
|
108 |
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
st.sidebar.markdown(
|
111 |
-
"
|
112 |
)
|
113 |
|
114 |
with st.sidebar:
|
115 |
pdf_docs = st.file_uploader(
|
116 |
-
"
|
117 |
accept_multiple_files=True,
|
118 |
type=["pdf"]
|
119 |
)
|
120 |
-
if st.button("
|
121 |
-
with st.spinner("
|
122 |
raw_text = get_pdf_text(pdf_docs)
|
123 |
text_chunks = get_text_chunks(raw_text)
|
124 |
get_vector_store(text_chunks)
|
125 |
-
st.success("PDFs
|
126 |
|
127 |
st.markdown(
|
128 |
-
"###
|
129 |
-
"
|
130 |
)
|
131 |
|
132 |
-
user_question = st.text_input("
|
133 |
|
134 |
if user_question:
|
135 |
-
with st.spinner("
|
136 |
user_input(user_question)
|
137 |
|
138 |
st.sidebar.info(
|
139 |
-
"**
|
140 |
)
|
141 |
|
142 |
if __name__ == "__main__":
|
|
|
15 |
os.getenv("GROQ_API_KEY")
|
16 |
|
17 |
def get_pdf_text(pdf_docs):
|
18 |
+
"""Extrae texto de los archivos PDF cargados."""
|
19 |
text = ""
|
20 |
for pdf in pdf_docs:
|
21 |
pdf_reader = PdfReader(pdf)
|
|
|
24 |
return text
|
25 |
|
26 |
def get_text_chunks(text):
|
27 |
+
"""Divide el texto extra铆do en fragmentos manejables."""
|
28 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
29 |
chunks = text_splitter.split_text(text)
|
30 |
return chunks
|
31 |
|
32 |
def get_vector_store(text_chunks):
|
33 |
+
"""Crea y guarda un almac茅n de vectores FAISS a partir de fragmentos de texto."""
|
34 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
35 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
36 |
vector_store.save_local("faiss_index")
|
37 |
|
38 |
def get_conversational_chain():
|
39 |
+
"""Configura una cadena conversacional usando el modelo Groq LLM."""
|
40 |
prompt_template = """
|
41 |
+
Responde la pregunta de la manera m谩s detallada posible a partir del contexto proporcionado. Si la respuesta no est谩 en
|
42 |
+
el contexto proporcionado, simplemente di, "la respuesta no est谩 disponible en el contexto." No proporciones respuestas incorrectas.
|
43 |
|
44 |
+
Contexto:
|
45 |
{context}?
|
46 |
|
47 |
+
Pregunta:
|
48 |
{question}
|
49 |
|
50 |
+
Respuesta:
|
51 |
"""
|
52 |
|
53 |
model = ChatGroq(
|
|
|
65 |
return texto_limpio
|
66 |
|
67 |
def user_input(user_question):
|
68 |
+
"""Maneja las consultas del usuario recuperando respuestas del almac茅n de vectores."""
|
69 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
70 |
|
71 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
|
|
78 |
return_only_outputs=True
|
79 |
)
|
80 |
|
81 |
+
# Depuraci贸n: Imprimir la respuesta original
|
82 |
original_response = response['output_text']
|
83 |
print("Original Response:", original_response)
|
84 |
|
85 |
+
# Extraer el proceso de pensamiento
|
86 |
thought_process = ""
|
87 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
88 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
89 |
if thought_process_match:
|
90 |
thought_process = thought_process_match.group(1).strip()
|
91 |
|
92 |
+
# Eliminar el proceso de pensamiento de la respuesta principal
|
93 |
clean_response = eliminar_texto_entre_tags(original_response)
|
94 |
|
95 |
+
# Depuraci贸n: Imprimir la respuesta limpia
|
96 |
print("Cleaned Response:", clean_response)
|
97 |
|
98 |
+
# Mostrar el proceso de pensamiento del modelo en el expander
|
99 |
+
with st.expander("Proceso de Pensamiento del Modelo"):
|
100 |
st.write(thought_process)
|
101 |
|
102 |
+
st.markdown(f"### Respuesta:\n{clean_response}")
|
103 |
|
104 |
def main():
|
105 |
+
"""Funci贸n principal para ejecutar la aplicaci贸n Streamlit."""
|
106 |
st.set_page_config(page_title="Chat PDF", page_icon=":books:", layout="wide")
|
|
|
107 |
|
108 |
+
# Configuraci贸n de la apariencia de la aplicaci贸n
|
109 |
+
st.markdown(
|
110 |
+
"""
|
111 |
+
<style>
|
112 |
+
body {
|
113 |
+
background-color: #1E90FF;
|
114 |
+
color: white;
|
115 |
+
}
|
116 |
+
.sidebar .sidebar-content {
|
117 |
+
background-color: #00008B;
|
118 |
+
}
|
119 |
+
.main {
|
120 |
+
background-color: #00008B;
|
121 |
+
color: white;
|
122 |
+
}
|
123 |
+
.stButton>button {
|
124 |
+
background-color: #1E90FF;
|
125 |
+
color: white;
|
126 |
+
}
|
127 |
+
</style>
|
128 |
+
""",
|
129 |
+
unsafe_allow_html=True
|
130 |
+
)
|
131 |
+
|
132 |
+
st.title("Chat con PDF usando DeepSeek AI")
|
133 |
+
|
134 |
+
st.sidebar.header("Subir y Procesar Archivos PDF")
|
135 |
st.sidebar.markdown(
|
136 |
+
"Usando el modelo DeepSeek R1 para capacidades conversacionales avanzadas."
|
137 |
)
|
138 |
|
139 |
with st.sidebar:
|
140 |
pdf_docs = st.file_uploader(
|
141 |
+
"Sube tus archivos PDF:",
|
142 |
accept_multiple_files=True,
|
143 |
type=["pdf"]
|
144 |
)
|
145 |
+
if st.button("Enviar y Procesar"):
|
146 |
+
with st.spinner("Procesando tus archivos..."):
|
147 |
raw_text = get_pdf_text(pdf_docs)
|
148 |
text_chunks = get_text_chunks(raw_text)
|
149 |
get_vector_store(text_chunks)
|
150 |
+
st.success("隆PDFs procesados e indexados exitosamente!")
|
151 |
|
152 |
st.markdown(
|
153 |
+
"### Haz Preguntas sobre tus Archivos PDF :mag:\n"
|
154 |
+
"Una vez que subas y proceses tus PDFs, escribe tus preguntas a continuaci贸n."
|
155 |
)
|
156 |
|
157 |
+
user_question = st.text_input("Introduce tu pregunta:", placeholder="驴Qu茅 quieres saber?")
|
158 |
|
159 |
if user_question:
|
160 |
+
with st.spinner("Obteniendo tu respuesta..."):
|
161 |
user_input(user_question)
|
162 |
|
163 |
st.sidebar.info(
|
164 |
+
"**Nota:** Esta aplicaci贸n usa el modelo DeepSeek R1 para responder preguntas de manera precisa."
|
165 |
)
|
166 |
|
167 |
if __name__ == "__main__":
|