wadhwahitesh commited on
Commit
95d1bce
·
1 Parent(s): 88afba6

Adding files

Browse files
YoutubeChat/YoutubeChat.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import YoutubeLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.chains import LLMChain
7
+ from langchain.memory import ConversationBufferWindowMemory
8
+ from dotenv import find_dotenv, load_dotenv
9
+ from langchain.agents import initialize_agent
10
+ from langchain.prompts.chat import (
11
+ ChatPromptTemplate,
12
+ SystemMessagePromptTemplate,
13
+ HumanMessagePromptTemplate,
14
+ )
15
+ import textwrap
16
+ from langchain.agents import Tool
17
+ from langchain.schema import SystemMessage
18
+ import os
19
+
20
+ class YoutubeChatBot:
21
+ def __init__(self, video_url, OPENAI_API_KEY= None):
22
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
23
+ embeddings = OpenAIEmbeddings()
24
+ loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=True)
25
+ transcript = loader.load()
26
+
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
28
+ docs = text_splitter.split_documents(transcript)
29
+
30
+ self.db = FAISS.from_documents(docs, embeddings)
31
+
32
+ self.llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.7)
33
+
34
+ self.conversational_memory = ConversationBufferWindowMemory(memory_key = "chat_history",
35
+ k=5,
36
+ return_messages = True)
37
+
38
+ self.tools = [Tool(name = 'Knowledge Base', func = self.getKnowledge,
39
+ description = """Use this tool to get informartion from the youtube video about the questions asked. Use the query which is outputed by the improve Query tool always. Do not use this on the query given by the user directly. The information will be from a Youtube Video."""),
40
+ Tool(name='Starting protocol', func = self.getIntroduction,
41
+ description = """Use this tool when you get a query saying 'start protocol' only.
42
+ This tool will return a summary of the video transcript and 3 questions that we suggest user to ask about the video to get more information.
43
+ There are no inputs to this tool.
44
+
45
+
46
+ """),
47
+ Tool(name = "Improve Query", func = self.improveQuery,
48
+ description = """Use this first after every user prompt. """)]
49
+
50
+ system_message = """
51
+ You are an AI chatbot which converses about a youtube video.
52
+ You have a tool to access that will give you information about the quetion asked from the video transcript.
53
+
54
+ If you feel the question is not relevant to the youtube video return 'I dont Know'. Do not use your own knowledge.
55
+ """
56
+ self.agent = initialize_agent(agent = 'chat-conversational-react-description',
57
+ tools = self.tools,
58
+ llm = self.llm,
59
+ verbose = True,
60
+ max_iterations = 5,
61
+ early_stopping_method = 'generate',
62
+ memory = self.conversational_memory,
63
+ handle_parsing_errors=True,
64
+ agent_kwargs = { "system_message": system_message })
65
+
66
+
67
+ def improveQuery(self, query):
68
+ return self.llm.invoke(f"You are an assistant whose task is make the user query better. Given this {self.conversational_memory} chat history and {query} this user query. Make the query better and more clear to improve search results. Do not return anything except the updated query.")
69
+ def getKnowledge(self, query, k =3):
70
+ docs = self.db.similarity_search(query, k=k)
71
+ docs_page_content = " ".join([d.page_content for d in docs])
72
+ return docs
73
+
74
+ def outputFormatter(self, output):
75
+ return self.llm.invoke(f""" You are an agent helping a user with a youtube video. You are returned this as the output {output} for a query. Your task is to format the output in the best possible way. This is the previous conversation {self.conversational_memory}. Instructions to format are,
76
+ 1. If there are long sentences break them using new line character.
77
+ 2. If there are numbered or bullet points, break them using new line characters.
78
+ 3. If there is no chat history, start with greeting the user followed by the formatted output.
79
+ 4. If there are questions in the output, return them as questions. Your only task is formatting.
80
+
81
+ Do not return anything except the output or anything not about the task. Do not say this was the format asked or mention formatted output. Return the respone once. This a system prompt and not a user promopt.
82
+ """)
83
+
84
+ def getIntroduction(self, query):
85
+ docs = self.db.similarity_search("video summary", k=3)
86
+ return self.llm.invoke(f"You are returned these Docs: {docs}\n when asked about the summary of the youtube video. Generate a summary of the youtube video using these docs and also generate 3 questions you recommend user to ask about the video knowing the summary. Not have the summary too long. Do NOT forget the questions.")
87
+
88
+ def send(self, user_query):
89
+ output = self.agent(user_query)['output']
90
+ return self.outputFormatter(output).content
91
+
92
+ def start(self):
93
+ self.send("start protocol")
94
+ while True:
95
+ query = input("Type your question below! Press exit() to stop asking questions.\nMessage: ")
96
+ if query == "exit()":
97
+ break
98
+ else:
99
+ self.send(query)
100
+
101
+
102
+ if __name__ == '__main__':
103
+ yt = YoutubeChatBot('https://youtu.be/8u-3Pn8tEXQ?si=vRA-yKCk7JDuFGlI', OPENAI_API_KEY='')
104
+ yt.start()
YoutubeChat/__init__.py ADDED
File without changes
YoutubeChat/__pycache__/YoutubeChat.cpython-310.pyc ADDED
Binary file (6.13 kB). View file
 
YoutubeChat/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (172 Bytes). View file
 
main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from YoutubeChat.YoutubeChat import YoutubeChatBot
3
+ from openai import AuthenticationError
4
+
5
+ def get_user_input():
6
+ st.title("Setup")
7
+
8
+ # Input field for YouTube link
9
+ youtube_link = st.text_input("Enter YouTube Video Link")
10
+
11
+ # Input field for OpenAI API key
12
+ openai_api_key = st.text_input("Enter OpenAI API Key", type = "password", help="Please delete the API key after use for your own security.")
13
+
14
+ # Button to start chat
15
+ if st.button("Start Chat"):
16
+ if youtube_link.strip() == "":
17
+ st.error("Please enter a valid YouTube link.")
18
+ elif openai_api_key.strip() == "":
19
+ st.error("Please enter your OpenAI API key.")
20
+ else:
21
+ try:
22
+ yt = YoutubeChatBot(youtube_link, OPENAI_API_KEY=openai_api_key)
23
+ start_message = yt.send('start protocol')
24
+ st.session_state.start_message = start_message
25
+ st.session_state.chat_object = yt
26
+ st.session_state.messages = [{'role':'assistant', 'content': start_message}]
27
+ st.rerun() # Rerun the app to switch to the main chat page
28
+ except AuthenticationError as e:
29
+ st.error("Incorrect API key provided. Please check your API key.")
30
+
31
+ # Optionally provide guidance on how to resolve the issue
32
+ st.write("You can find your API key at https://platform.openai.com/account/api-keys.")
33
+ except ValueError as e:
34
+ st.error("Incorrect video link provided! ")
35
+
36
+
37
+ def main_chat_page():
38
+ st.title("Main Chat Page")
39
+
40
+ for message in st.session_state.messages:
41
+ with st.chat_message(message.get("role")):
42
+ st.write(message.get("content"))
43
+
44
+ # Display chat input box
45
+ prompt = st.chat_input("Ask something")
46
+
47
+ # If user inputs something
48
+ if prompt:
49
+ st.session_state.messages.append({"role": "user", "content": prompt})
50
+
51
+ with st.chat_message("user"):
52
+ st.write(prompt)
53
+
54
+ result = st.session_state.chat_object.send(prompt)
55
+ st.session_state.messages.append({'role':'assistant', "content": result})
56
+
57
+ with st.chat_message("assistant"):
58
+ st.write(result)
59
+
60
+ if __name__ == "__main__":
61
+ if "start_message" not in st.session_state:
62
+ get_user_input()
63
+ else:
64
+ main_chat_page()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ youtube-transcript-api==0.6.2
2
+ faiss-cpu==1.8.0
3
+ pytube==15.0.0
4
+ langchain==0.1.13
5
+ python-dotenv==1.0.1
6
+ openai==1.14.2
7
+ tiktoken==0.6.0
8
+ pip==22.2.1