OpenRAG128 commited on
Commit
195493e
Β·
verified Β·
1 Parent(s): e5c88d8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain_groq import ChatGroq
7
+
8
+
9
+
10
+ loader = PyPDFDirectoryLoader("Dataset")
11
+ docx = loader.load()
12
+ #st.write("Number of documents loaded:", len(docx))
13
+
14
+
15
+ text_sp = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
16
+ chunks = text_sp.split_documents(docx)
17
+ #st.write("Number of chunks created:", len(chunks))
18
+
19
+
20
+ if not chunks:
21
+ st.error("No chunks were created. Please check the documents or text splitter settings.")
22
+ else:
23
+ # Step 3: Create metadata
24
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(chunks))]
25
+
26
+
27
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
28
+ model_kwargs={'device': "cpu"})
29
+
30
+
31
+ try:
32
+ sample_embedding = embeddings.embed_documents([chunks[0].page_content])
33
+ #st.write("Sample embedding length:", len(sample_embedding[0]))
34
+ except Exception as e:
35
+ st.error(f"Error in generating embeddings: {str(e)}")
36
+
37
+
38
+ try:
39
+ vector_store = FAISS.from_documents(chunks, embeddings)
40
+ #st.write("Vector store created successfully.")
41
+ except IndexError as e:
42
+ st.error("IndexError in creating vector store: " + str(e))
43
+ st.write("Check if chunks and embeddings are non-empty and correctly formatted.")
44
+ st.write("Chunks:", chunks)
45
+ except Exception as e:
46
+ st.error(f"An error occurred: {str(e)}")
47
+
48
+ llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")
49
+
50
+ def retrieve(query, vector_store, embeddings):
51
+ query_embedding = embeddings.embed_query(query)
52
+ results = vector_store.similarity_search_by_vector(query_embedding, k=5)
53
+ return results
54
+
55
+ from langchain.schema import HumanMessage, SystemMessage
56
+
57
+ def generate_response(query, retrieved_docs, llm):
58
+ context = " ".join([doc.page_content for doc in retrieved_docs])
59
+ messages = [
60
+ SystemMessage(content="You are an expert in prompt engineering."),
61
+ HumanMessage(content=f"Context: {context}\n\nQuestion: {query}\n\nAnswer:")
62
+ ]
63
+ response = llm(messages)
64
+ return response.content.strip()
65
+
66
+
67
+
68
+ st.title("PromptGuru πŸš€πŸ“–")
69
+
70
+ st.sidebar.markdown("PromptGuru By OpenRAG πŸ—£οΈ")
71
+ st.sidebar.markdown(
72
+ """
73
+ PromptGuru is a tool you can use for asking any queries related Prompt Engineering and Get it solved within a couple of minutes.
74
+ """
75
+ )
76
+
77
+ st.sidebar.markdown(
78
+ """
79
+ Note -- This tool is in a beta stage. Kindly have some patience while generating the response and give the model time to think.
80
+ """
81
+ )
82
+
83
+ st.sidebar.markdown(
84
+ """
85
+ πŸ“§ **Get in Touch**
86
+
87
+ For inquiries or collaboration proposals, please don't hesitate to reach out to us:
88
+ πŸ“© Email: [email protected]
89
+ πŸ”— LinkedIn: [OpenRAG](https://www.linkedin.com/company/102036854/admin/dashboard/)
90
+ πŸ“Έ Instagram: [OpenRAG](https://www.instagram.com/open.rag?igsh=MnFwMHd5cjU1OGFj)
91
+
92
+ Experience the future of Human-Chatbot Interaction with OpenRAG.
93
+ """
94
+
95
+ )
96
+
97
+ query = st.text_input("Ask your question about prompt engineering:")
98
+ if query:
99
+ with st.spinner("Retrieving documents..."):
100
+ retrieved_docs = retrieve(query, vector_store, embeddings)
101
+ # st.write("Retrieved Documents:", retrieved_docs)
102
+
103
+ with st.spinner("Generating response..."):
104
+ response = generate_response(query, retrieved_docs, llm)
105
+ st.write("Response:", response)
106
+
107
+