nisharg nargund commited on
Commit
6ae0e61
·
verified ·
1 Parent(s): 99dadc4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain_google_genai import ChatGoogleGenerativeAI
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain.chains import create_retrieval_chain
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
12
+ from bs4 import BeautifulSoup as Soup
13
+ import time
14
+ from langchain.embeddings import HuggingFaceEmbeddings
15
+ from streamlit_option_menu import option_menu
16
+
17
+
18
+ st.sidebar.title("OpenRAG")
19
+ st.sidebar.markdown(
20
+ """
21
+ OpenRAG is a tool that enhances the speed and efficiency of retrieving information from educational websites,
22
+ including the scrap it out component, allowing quick access to precise answers.
23
+ """
24
+ )
25
+ st.sidebar.markdown(
26
+ """
27
+ Whether for academic research, professional inquiries, or personal curiosity, OpenRAG's Scrap it out feature is poised
28
+ to revolutionize the way users engage with online educational resources. Experience the unparalleled convenience and effectiveness of Scrap it out
29
+ – your gateway to rapid, reliable information retrieval.
30
+ """
31
+ )
32
+
33
+ st.sidebar.markdown(
34
+ """
35
+ Enjoy Using Scarp it out!!
36
+ """
37
+
38
+ )
39
+
40
+
41
+ st.title("Scrap it out 🦅")
42
+ st.text("")
43
+ url_link = st.text_input("Input your website link here")
44
+
45
+ # Check if website needs to be loaded (initial load or new URL)
46
+ if url_link and ("vector" not in st.session_state or url_link != st.session_state.get("loaded_url")):
47
+ with st.spinner("Loading..."):
48
+ st.session_state.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
49
+ st.session_state.loader = RecursiveUrlLoader(url=url_link, max_depth=10, extractor=lambda x: Soup(x, "html.parser").text)
50
+ st.session_state.docs = st.session_state.loader.load()
51
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
52
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
53
+ st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
54
+ st.session_state["loaded_url"] = url_link # Store the loaded URL
55
+ st.success("Loaded!")
56
+
57
+ # Rest of the code for LLM and user interaction remains the same
58
+
59
+ llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key="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")
60
+
61
+
62
+ prompt = ChatPromptTemplate.from_template(
63
+ """
64
+ Answer the questions based on the provided context only.
65
+ Please provide the most accurate response based on the question.
66
+ <context>
67
+ {context}
68
+ <context>
69
+ Questions;{input}
70
+ """
71
+ )
72
+
73
+ if url_link:
74
+ document_chain = create_stuff_documents_chain(llm, prompt)
75
+ retriever = st.session_state.vectors.as_retriever()
76
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
77
+
78
+ st.text("")
79
+ query = st.text_input("Input your question here")
80
+
81
+ if query:
82
+ start = time.process_time()
83
+ response = (retrieval_chain.invoke({"input":query}))
84
+ print("Response time: ", time.process_time() - start)
85
+ st.write(response['answer'])
86
+ st.write("Response time: ", time.process_time() - start)
87
+
88
+ with st.expander("NOT THE EXPECTED RESPONSE? CHECK OUT HERE"):
89
+
90
+ for i, doc in enumerate(response["context"]):
91
+ st.write(doc.page_content)
92
+ st.write("----------------------------------")