Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
import requests
|
| 4 |
-
import
|
| 5 |
-
import chromadb
|
| 6 |
from langchain.document_loaders import PDFPlumberLoader
|
| 7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 8 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
@@ -14,13 +13,16 @@ from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt
|
|
| 14 |
|
| 15 |
# ----------------- Streamlit UI Setup -----------------
|
| 16 |
st.set_page_config(page_title="Blah", layout="wide")
|
| 17 |
-
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
|
| 18 |
st.title("Blah-1")
|
| 19 |
|
| 20 |
-
|
| 21 |
# ----------------- API Keys -----------------
|
| 22 |
os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# ----------------- Clear ChromaDB Cache -----------------
|
| 25 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
| 26 |
|
|
@@ -36,10 +38,14 @@ if "processed_chunks" not in st.session_state:
|
|
| 36 |
if "vector_store" not in st.session_state:
|
| 37 |
st.session_state.vector_store = None
|
| 38 |
|
| 39 |
-
# ----------------- Load Models
|
| 40 |
llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
|
| 41 |
rag_llm = ChatGroq(model="mixtral-8x7b-32768")
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
# ----------------- PDF Selection (Upload or URL) -----------------
|
| 44 |
st.sidebar.subheader("π PDF Selection")
|
| 45 |
pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
|
|
@@ -79,7 +85,7 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
|
|
| 79 |
loader = PDFPlumberLoader(st.session_state.pdf_path)
|
| 80 |
docs = loader.load()
|
| 81 |
|
| 82 |
-
# Embedding Model
|
| 83 |
model_name = "nomic-ai/modernbert-embed-base"
|
| 84 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
|
| 85 |
|
|
@@ -137,15 +143,15 @@ if query:
|
|
| 137 |
final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
|
| 138 |
st.success("β
Full pipeline executed successfully!")
|
| 139 |
|
| 140 |
-
# ----------------- Display All Outputs -----------------
|
| 141 |
-
st.
|
| 142 |
st.json(final_output["relevancy_response"])
|
| 143 |
|
| 144 |
-
st.
|
| 145 |
st.json(final_output["context_number"])
|
| 146 |
|
| 147 |
-
st.
|
| 148 |
st.json(final_output["relevant_contexts"])
|
| 149 |
|
| 150 |
-
st.
|
| 151 |
st.write(final_output["final_response"])
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import os
|
| 3 |
import requests
|
| 4 |
+
import chromadb
|
|
|
|
| 5 |
from langchain.document_loaders import PDFPlumberLoader
|
| 6 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 7 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
|
| 13 |
|
| 14 |
# ----------------- Streamlit UI Setup -----------------
|
| 15 |
st.set_page_config(page_title="Blah", layout="wide")
|
| 16 |
+
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
|
| 17 |
st.title("Blah-1")
|
| 18 |
|
|
|
|
| 19 |
# ----------------- API Keys -----------------
|
| 20 |
os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
|
| 21 |
|
| 22 |
+
# ----------------- Ensure Vector Store Directory Exists -----------------
|
| 23 |
+
if not os.path.exists("./chroma_langchain_db"):
|
| 24 |
+
os.makedirs("./chroma_langchain_db")
|
| 25 |
+
|
| 26 |
# ----------------- Clear ChromaDB Cache -----------------
|
| 27 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
| 28 |
|
|
|
|
| 38 |
if "vector_store" not in st.session_state:
|
| 39 |
st.session_state.vector_store = None
|
| 40 |
|
| 41 |
+
# ----------------- Load Models -------------------
|
| 42 |
llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
|
| 43 |
rag_llm = ChatGroq(model="mixtral-8x7b-32768")
|
| 44 |
|
| 45 |
+
# Enable verbose logging for debugging
|
| 46 |
+
llm_judge.verbose = True
|
| 47 |
+
rag_llm.verbose = True
|
| 48 |
+
|
| 49 |
# ----------------- PDF Selection (Upload or URL) -----------------
|
| 50 |
st.sidebar.subheader("π PDF Selection")
|
| 51 |
pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
|
|
|
|
| 85 |
loader = PDFPlumberLoader(st.session_state.pdf_path)
|
| 86 |
docs = loader.load()
|
| 87 |
|
| 88 |
+
# Embedding Model (HF on CPU)
|
| 89 |
model_name = "nomic-ai/modernbert-embed-base"
|
| 90 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
|
| 91 |
|
|
|
|
| 143 |
final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
|
| 144 |
st.success("β
Full pipeline executed successfully!")
|
| 145 |
|
| 146 |
+
# ----------------- Display All Outputs (Formatted) -----------------
|
| 147 |
+
st.markdown("### π₯ Context Relevancy Evaluation")
|
| 148 |
st.json(final_output["relevancy_response"])
|
| 149 |
|
| 150 |
+
st.markdown("### π¦ Picked Relevant Contexts")
|
| 151 |
st.json(final_output["context_number"])
|
| 152 |
|
| 153 |
+
st.markdown("### π₯ Extracted Relevant Contexts")
|
| 154 |
st.json(final_output["relevant_contexts"])
|
| 155 |
|
| 156 |
+
st.markdown("## π₯ RAG Final Response")
|
| 157 |
st.write(final_output["final_response"])
|