Spaces:
Runtime error
Runtime error
Commit
·
95d1bce
1
Parent(s):
88afba6
Adding files
Browse files- YoutubeChat/YoutubeChat.py +104 -0
- YoutubeChat/__init__.py +0 -0
- YoutubeChat/__pycache__/YoutubeChat.cpython-310.pyc +0 -0
- YoutubeChat/__pycache__/__init__.cpython-310.pyc +0 -0
- main.py +64 -0
- requirements.txt +8 -0
YoutubeChat/YoutubeChat.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.document_loaders import YoutubeLoader
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
+
from langchain.vectorstores import FAISS
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.chains import LLMChain
|
7 |
+
from langchain.memory import ConversationBufferWindowMemory
|
8 |
+
from dotenv import find_dotenv, load_dotenv
|
9 |
+
from langchain.agents import initialize_agent
|
10 |
+
from langchain.prompts.chat import (
|
11 |
+
ChatPromptTemplate,
|
12 |
+
SystemMessagePromptTemplate,
|
13 |
+
HumanMessagePromptTemplate,
|
14 |
+
)
|
15 |
+
import textwrap
|
16 |
+
from langchain.agents import Tool
|
17 |
+
from langchain.schema import SystemMessage
|
18 |
+
import os
|
19 |
+
|
20 |
+
class YoutubeChatBot:
|
21 |
+
def __init__(self, video_url, OPENAI_API_KEY= None):
|
22 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
23 |
+
embeddings = OpenAIEmbeddings()
|
24 |
+
loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=True)
|
25 |
+
transcript = loader.load()
|
26 |
+
|
27 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
|
28 |
+
docs = text_splitter.split_documents(transcript)
|
29 |
+
|
30 |
+
self.db = FAISS.from_documents(docs, embeddings)
|
31 |
+
|
32 |
+
self.llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.7)
|
33 |
+
|
34 |
+
self.conversational_memory = ConversationBufferWindowMemory(memory_key = "chat_history",
|
35 |
+
k=5,
|
36 |
+
return_messages = True)
|
37 |
+
|
38 |
+
self.tools = [Tool(name = 'Knowledge Base', func = self.getKnowledge,
|
39 |
+
description = """Use this tool to get informartion from the youtube video about the questions asked. Use the query which is outputed by the improve Query tool always. Do not use this on the query given by the user directly. The information will be from a Youtube Video."""),
|
40 |
+
Tool(name='Starting protocol', func = self.getIntroduction,
|
41 |
+
description = """Use this tool when you get a query saying 'start protocol' only.
|
42 |
+
This tool will return a summary of the video transcript and 3 questions that we suggest user to ask about the video to get more information.
|
43 |
+
There are no inputs to this tool.
|
44 |
+
|
45 |
+
|
46 |
+
"""),
|
47 |
+
Tool(name = "Improve Query", func = self.improveQuery,
|
48 |
+
description = """Use this first after every user prompt. """)]
|
49 |
+
|
50 |
+
system_message = """
|
51 |
+
You are an AI chatbot which converses about a youtube video.
|
52 |
+
You have a tool to access that will give you information about the quetion asked from the video transcript.
|
53 |
+
|
54 |
+
If you feel the question is not relevant to the youtube video return 'I dont Know'. Do not use your own knowledge.
|
55 |
+
"""
|
56 |
+
self.agent = initialize_agent(agent = 'chat-conversational-react-description',
|
57 |
+
tools = self.tools,
|
58 |
+
llm = self.llm,
|
59 |
+
verbose = True,
|
60 |
+
max_iterations = 5,
|
61 |
+
early_stopping_method = 'generate',
|
62 |
+
memory = self.conversational_memory,
|
63 |
+
handle_parsing_errors=True,
|
64 |
+
agent_kwargs = { "system_message": system_message })
|
65 |
+
|
66 |
+
|
67 |
+
def improveQuery(self, query):
|
68 |
+
return self.llm.invoke(f"You are an assistant whose task is make the user query better. Given this {self.conversational_memory} chat history and {query} this user query. Make the query better and more clear to improve search results. Do not return anything except the updated query.")
|
69 |
+
def getKnowledge(self, query, k =3):
|
70 |
+
docs = self.db.similarity_search(query, k=k)
|
71 |
+
docs_page_content = " ".join([d.page_content for d in docs])
|
72 |
+
return docs
|
73 |
+
|
74 |
+
def outputFormatter(self, output):
|
75 |
+
return self.llm.invoke(f""" You are an agent helping a user with a youtube video. You are returned this as the output {output} for a query. Your task is to format the output in the best possible way. This is the previous conversation {self.conversational_memory}. Instructions to format are,
|
76 |
+
1. If there are long sentences break them using new line character.
|
77 |
+
2. If there are numbered or bullet points, break them using new line characters.
|
78 |
+
3. If there is no chat history, start with greeting the user followed by the formatted output.
|
79 |
+
4. If there are questions in the output, return them as questions. Your only task is formatting.
|
80 |
+
|
81 |
+
Do not return anything except the output or anything not about the task. Do not say this was the format asked or mention formatted output. Return the respone once. This a system prompt and not a user promopt.
|
82 |
+
""")
|
83 |
+
|
84 |
+
def getIntroduction(self, query):
|
85 |
+
docs = self.db.similarity_search("video summary", k=3)
|
86 |
+
return self.llm.invoke(f"You are returned these Docs: {docs}\n when asked about the summary of the youtube video. Generate a summary of the youtube video using these docs and also generate 3 questions you recommend user to ask about the video knowing the summary. Not have the summary too long. Do NOT forget the questions.")
|
87 |
+
|
88 |
+
def send(self, user_query):
|
89 |
+
output = self.agent(user_query)['output']
|
90 |
+
return self.outputFormatter(output).content
|
91 |
+
|
92 |
+
def start(self):
|
93 |
+
self.send("start protocol")
|
94 |
+
while True:
|
95 |
+
query = input("Type your question below! Press exit() to stop asking questions.\nMessage: ")
|
96 |
+
if query == "exit()":
|
97 |
+
break
|
98 |
+
else:
|
99 |
+
self.send(query)
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == '__main__':
|
103 |
+
yt = YoutubeChatBot('https://youtu.be/8u-3Pn8tEXQ?si=vRA-yKCk7JDuFGlI', OPENAI_API_KEY='')
|
104 |
+
yt.start()
|
YoutubeChat/__init__.py
ADDED
File without changes
|
YoutubeChat/__pycache__/YoutubeChat.cpython-310.pyc
ADDED
Binary file (6.13 kB). View file
|
|
YoutubeChat/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (172 Bytes). View file
|
|
main.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from YoutubeChat.YoutubeChat import YoutubeChatBot
|
3 |
+
from openai import AuthenticationError
|
4 |
+
|
5 |
+
def get_user_input():
|
6 |
+
st.title("Setup")
|
7 |
+
|
8 |
+
# Input field for YouTube link
|
9 |
+
youtube_link = st.text_input("Enter YouTube Video Link")
|
10 |
+
|
11 |
+
# Input field for OpenAI API key
|
12 |
+
openai_api_key = st.text_input("Enter OpenAI API Key", type = "password", help="Please delete the API key after use for your own security.")
|
13 |
+
|
14 |
+
# Button to start chat
|
15 |
+
if st.button("Start Chat"):
|
16 |
+
if youtube_link.strip() == "":
|
17 |
+
st.error("Please enter a valid YouTube link.")
|
18 |
+
elif openai_api_key.strip() == "":
|
19 |
+
st.error("Please enter your OpenAI API key.")
|
20 |
+
else:
|
21 |
+
try:
|
22 |
+
yt = YoutubeChatBot(youtube_link, OPENAI_API_KEY=openai_api_key)
|
23 |
+
start_message = yt.send('start protocol')
|
24 |
+
st.session_state.start_message = start_message
|
25 |
+
st.session_state.chat_object = yt
|
26 |
+
st.session_state.messages = [{'role':'assistant', 'content': start_message}]
|
27 |
+
st.rerun() # Rerun the app to switch to the main chat page
|
28 |
+
except AuthenticationError as e:
|
29 |
+
st.error("Incorrect API key provided. Please check your API key.")
|
30 |
+
|
31 |
+
# Optionally provide guidance on how to resolve the issue
|
32 |
+
st.write("You can find your API key at https://platform.openai.com/account/api-keys.")
|
33 |
+
except ValueError as e:
|
34 |
+
st.error("Incorrect video link provided! ")
|
35 |
+
|
36 |
+
|
37 |
+
def main_chat_page():
|
38 |
+
st.title("Main Chat Page")
|
39 |
+
|
40 |
+
for message in st.session_state.messages:
|
41 |
+
with st.chat_message(message.get("role")):
|
42 |
+
st.write(message.get("content"))
|
43 |
+
|
44 |
+
# Display chat input box
|
45 |
+
prompt = st.chat_input("Ask something")
|
46 |
+
|
47 |
+
# If user inputs something
|
48 |
+
if prompt:
|
49 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
50 |
+
|
51 |
+
with st.chat_message("user"):
|
52 |
+
st.write(prompt)
|
53 |
+
|
54 |
+
result = st.session_state.chat_object.send(prompt)
|
55 |
+
st.session_state.messages.append({'role':'assistant', "content": result})
|
56 |
+
|
57 |
+
with st.chat_message("assistant"):
|
58 |
+
st.write(result)
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
if "start_message" not in st.session_state:
|
62 |
+
get_user_input()
|
63 |
+
else:
|
64 |
+
main_chat_page()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
youtube-transcript-api==0.6.2
|
2 |
+
faiss-cpu==1.8.0
|
3 |
+
pytube==15.0.0
|
4 |
+
langchain==0.1.13
|
5 |
+
python-dotenv==1.0.1
|
6 |
+
openai==1.14.2
|
7 |
+
tiktoken==0.6.0
|
8 |
+
pip==22.2.1
|