File size: 3,685 Bytes
195493e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq



loader = PyPDFDirectoryLoader("Dataset")
docx = loader.load()
#st.write("Number of documents loaded:", len(docx))


text_sp = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_sp.split_documents(docx)
#st.write("Number of chunks created:", len(chunks))


if not chunks:
    st.error("No chunks were created. Please check the documents or text splitter settings.")
else:
    # Step 3: Create metadata
    metadatas = [{"source": f"{i}-pl"} for i in range(len(chunks))]

    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': "cpu"})

   
    try:
        sample_embedding = embeddings.embed_documents([chunks[0].page_content])
        #st.write("Sample embedding length:", len(sample_embedding[0]))
    except Exception as e:
        st.error(f"Error in generating embeddings: {str(e)}")

    
    try:
        vector_store = FAISS.from_documents(chunks, embeddings)
        #st.write("Vector store created successfully.")
    except IndexError as e:
        st.error("IndexError in creating vector store: " + str(e))
        st.write("Check if chunks and embeddings are non-empty and correctly formatted.")
        st.write("Chunks:", chunks)
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")

llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")

def retrieve(query, vector_store, embeddings):
    query_embedding = embeddings.embed_query(query)
    results = vector_store.similarity_search_by_vector(query_embedding, k=5)
    return results

from langchain.schema import HumanMessage, SystemMessage

def generate_response(query, retrieved_docs, llm):
    context = " ".join([doc.page_content for doc in retrieved_docs])
    messages = [
        SystemMessage(content="You are an expert in prompt engineering."),
        HumanMessage(content=f"Context: {context}\n\nQuestion: {query}\n\nAnswer:")
    ]
    response = llm(messages)
    return response.content.strip()



st.title("PromptGuru πŸš€πŸ“–")

st.sidebar.markdown("PromptGuru By OpenRAG πŸ—£οΈ")
st.sidebar.markdown(
    """
PromptGuru is a tool you can use for asking any queries related Prompt Engineering and Get it solved within a couple of minutes.
    """
    )

st.sidebar.markdown(
    """
Note -- This tool is in a beta stage. Kindly have some patience while generating the response and give the model time to think. 
    """
    )

st.sidebar.markdown(
    """
πŸ“§ **Get in Touch**

For inquiries or collaboration proposals, please don't hesitate to reach out to us:
πŸ“© Email: [email protected]
πŸ”— LinkedIn: [OpenRAG](https://www.linkedin.com/company/102036854/admin/dashboard/)
πŸ“Έ Instagram: [OpenRAG](https://www.instagram.com/open.rag?igsh=MnFwMHd5cjU1OGFj)

Experience the future of Human-Chatbot Interaction with OpenRAG. 
    """
    
)

query = st.text_input("Ask your question about prompt engineering:")
if query:
    with st.spinner("Retrieving documents..."):
        retrieved_docs = retrieve(query, vector_store, embeddings)
       # st.write("Retrieved Documents:", retrieved_docs)
    
    with st.spinner("Generating response..."):
        response = generate_response(query, retrieved_docs, llm)
        st.write("Response:", response)