OpenRAG128's picture
Create app.py
195493e verified
raw
history blame
3.69 kB
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
loader = PyPDFDirectoryLoader("Dataset")
docx = loader.load()
#st.write("Number of documents loaded:", len(docx))
text_sp = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_sp.split_documents(docx)
#st.write("Number of chunks created:", len(chunks))
if not chunks:
st.error("No chunks were created. Please check the documents or text splitter settings.")
else:
# Step 3: Create metadata
metadatas = [{"source": f"{i}-pl"} for i in range(len(chunks))]
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': "cpu"})
try:
sample_embedding = embeddings.embed_documents([chunks[0].page_content])
#st.write("Sample embedding length:", len(sample_embedding[0]))
except Exception as e:
st.error(f"Error in generating embeddings: {str(e)}")
try:
vector_store = FAISS.from_documents(chunks, embeddings)
#st.write("Vector store created successfully.")
except IndexError as e:
st.error("IndexError in creating vector store: " + str(e))
st.write("Check if chunks and embeddings are non-empty and correctly formatted.")
st.write("Chunks:", chunks)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")
def retrieve(query, vector_store, embeddings):
query_embedding = embeddings.embed_query(query)
results = vector_store.similarity_search_by_vector(query_embedding, k=5)
return results
from langchain.schema import HumanMessage, SystemMessage
def generate_response(query, retrieved_docs, llm):
context = " ".join([doc.page_content for doc in retrieved_docs])
messages = [
SystemMessage(content="You are an expert in prompt engineering."),
HumanMessage(content=f"Context: {context}\n\nQuestion: {query}\n\nAnswer:")
]
response = llm(messages)
return response.content.strip()
st.title("PromptGuru πŸš€πŸ“–")
st.sidebar.markdown("PromptGuru By OpenRAG πŸ—£οΈ")
st.sidebar.markdown(
"""
PromptGuru is a tool you can use for asking any queries related Prompt Engineering and Get it solved within a couple of minutes.
"""
)
st.sidebar.markdown(
"""
Note -- This tool is in a beta stage. Kindly have some patience while generating the response and give the model time to think.
"""
)
st.sidebar.markdown(
"""
πŸ“§ **Get in Touch**
For inquiries or collaboration proposals, please don't hesitate to reach out to us:
πŸ“© Email: [email protected]
πŸ”— LinkedIn: [OpenRAG](https://www.linkedin.com/company/102036854/admin/dashboard/)
πŸ“Έ Instagram: [OpenRAG](https://www.instagram.com/open.rag?igsh=MnFwMHd5cjU1OGFj)
Experience the future of Human-Chatbot Interaction with OpenRAG.
"""
)
query = st.text_input("Ask your question about prompt engineering:")
if query:
with st.spinner("Retrieving documents..."):
retrieved_docs = retrieve(query, vector_store, embeddings)
# st.write("Retrieved Documents:", retrieved_docs)
with st.spinner("Generating response..."):
response = generate_response(query, retrieved_docs, llm)
st.write("Response:", response)