shouvik27 commited on
Commit
f12ae57
Β·
verified Β·
1 Parent(s): 363722b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import time
5
+ from langchain import OpenAI
6
+ from langchain.chains import RetrievalQAWithSourcesChain
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.document_loaders import UnstructuredURLLoader
9
+ from langchain.embeddings import OpenAIEmbeddings
10
+ from langchain.vectorstores import FAISS
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv() # take environment variables from .env (especially openai api key)
14
+
15
+ st.title("RockyBot: News Research Tool πŸ“ˆ")
16
+ st.sidebar.title("News Article URLs")
17
+
18
+ urls = []
19
+ for i in range(3):
20
+ url = st.sidebar.text_input(f"URL {i+1}")
21
+ urls.append(url)
22
+
23
+ process_url_clicked = st.sidebar.button("Process URLs")
24
+ file_path = "faiss_store_openai.pkl"
25
+
26
+ main_placeholder = st.empty()
27
+ llm = OpenAI(temperature=0.9, max_tokens=500)
28
+
29
+ if process_url_clicked:
30
+ # load data
31
+ loader = UnstructuredURLLoader(urls=urls)
32
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
33
+ data = loader.load()
34
+ # split data
35
+ text_splitter = RecursiveCharacterTextSplitter(
36
+ separators=['\n\n', '\n', '.', ','],
37
+ chunk_size=1000
38
+ )
39
+ main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
40
+ docs = text_splitter.split_documents(data)
41
+ # create embeddings and save it to FAISS index
42
+ embeddings = OpenAIEmbeddings()
43
+ vectorstore_openai = FAISS.from_documents(docs, embeddings)
44
+ main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
45
+ time.sleep(2)
46
+
47
+ # Save the FAISS index to a pickle file
48
+ with open(file_path, "wb") as f:
49
+ pickle.dump(vectorstore_openai, f)
50
+
51
+ query = main_placeholder.text_input("Question: ")
52
+ if query:
53
+ if os.path.exists(file_path):
54
+ with open(file_path, "rb") as f:
55
+ vectorstore = pickle.load(f)
56
+ chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
57
+ result = chain({"question": query}, return_only_outputs=True)
58
+ # result will be a dictionary of this format --> {"answer": "", "sources": [] }
59
+ st.header("Answer")
60
+ st.write(result["answer"])
61
+
62
+ # Display sources, if available
63
+ sources = result.get("sources", "")
64
+ if sources:
65
+ st.subheader("Sources:")
66
+ sources_list = sources.split("\n") # Split the sources by newline
67
+ for source in sources_list:
68
+ st.write(source)
69
+
70
+
71
+
72
+