Wikipedia-Twitter-ChatGPT-Memory-Chat

Running

App Files Files Community

Wikipedia-Twitter-ChatGPT-Memory-Chat / app.py

awacke1

Update app.py

a5f6f81 about 2 years ago

raw

history blame contribute delete

10.7 kB

	import gradio as gr
	import os
	import json
	import requests
	import pandas as pd
	import wikipediaapi
	import wikipedia
	from wikipedia.exceptions import DisambiguationError

	#Streaming endpoint
	API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
	OPENAI_API_KEY= os.environ["HF_TOKEN"] # Add a token to this space . Then copy it to the repository secret in this spaces settings panel. os.environ reads from there.
	# Keys for Open AI ChatGPT API usage are created from here: https://platform.openai.com/account/api-keys


	#Wikipedia API:

	def get_pagetext(page):
	s=str(page).replace("/t","")

	#def get_wiki_summary(inputs, search, history=[]):

	def get_wiki_summary(search):
	wiki_wiki = wikipediaapi.Wikipedia('en')
	page = wiki_wiki.page(search)

	isExist = page.exists()
	if not isExist:
	return isExist, "Not found", "Not found", "Not found", "Not found"

	pageurl = page.fullurl
	pagetitle = page.title
	pagesummary = page.summary[0:60]
	pagetext = get_pagetext(page.text)

	backlinks = page.backlinks
	linklist = ""
	for link in backlinks.items():
	pui = link[0]
	linklist += pui + " , "
	a=1

	categories = page.categories
	categorylist = ""
	for category in categories.items():
	pui = category[0]
	categorylist += pui + " , "
	a=1

	links = page.links
	linklist2 = ""
	for link in links.items():
	pui = link[0]
	linklist2 += pui + " , "
	a=1

	sections = page.sections

	ex_dic = {
	'Entity' : ["URL","Title","Summary", "Text", "Backlinks", "Links", "Categories"],
	'Value': [pageurl, pagetitle, pagesummary, pagetext, linklist,linklist2, categorylist ]
	}

	df = pd.DataFrame(ex_dic)

	# yield df

	return df

	# ChatGPT

	def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]): #repetition_penalty, top_k

	# 1. Set up a payload
	payload = {
	"model": "gpt-3.5-turbo",
	"messages": [{"role": "user", "content": f"{inputs}"}],
	"temperature" : 1.0,
	"top_p":1.0,
	"n" : 1,
	"stream": True,
	"presence_penalty":0,
	"frequency_penalty":0,
	}

	# 2. Define your headers and add a key from https://platform.openai.com/account/api-keys
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {OPENAI_API_KEY}"
	}

	# 3. Create a chat counter loop that feeds [Predict next best anything based on last input and attention with memory defined by introspective attention over time]
	print(f"chat_counter - {chat_counter}")
	if chat_counter != 0 :
	messages=[]
	for data in chatbot:
	temp1 = {}
	temp1["role"] = "user"
	temp1["content"] = data[0]
	temp2 = {}
	temp2["role"] = "assistant"
	temp2["content"] = data[1]
	messages.append(temp1)
	messages.append(temp2)
	temp3 = {}
	temp3["role"] = "user"
	temp3["content"] = inputs
	messages.append(temp3)
	payload = {
	"model": "gpt-3.5-turbo",
	"messages": messages, #[{"role": "user", "content": f"{inputs}"}],
	"temperature" : temperature, #1.0,
	"top_p": top_p, #1.0,
	"n" : 1,
	"stream": True,
	"presence_penalty":0,
	"frequency_penalty":0,
	}
	chat_counter+=1

	# 4. POST it to OPENAI API
	history.append(inputs)
	print(f"payload is - {payload}")
	response = requests.post(API_URL, headers=headers, json=payload, stream=True)
	token_counter = 0
	partial_words = ""

	# 5. Iterate through response lines and structure readable response
	counter=0
	for chunk in response.iter_lines():
	if counter == 0:
	counter+=1
	continue
	if chunk.decode() :
	chunk = chunk.decode()
	if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
	partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
	if token_counter == 0:
	history.append(" " + partial_words)
	else:
	history[-1] = partial_words
	chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
	token_counter+=1
	yield chat, history, chat_counter


	def reset_textbox():
	return gr.update(value='')




	# Episodic and Semantic IO
	def list_files(file_path):
	import os
	icon_csv = "📄 "
	icon_txt = "📑 "
	current_directory = os.getcwd()
	file_list = []
	for filename in os.listdir(current_directory):
	if filename.endswith(".csv"):
	file_list.append(icon_csv + filename)
	elif filename.endswith(".txt"):
	file_list.append(icon_txt + filename)
	if file_list:
	return "\n".join(file_list)
	else:
	return "No .csv or .txt files found in the current directory."

	# Function to read a file
	def read_file(file_path):
	try:
	with open(file_path, "r") as file:
	contents = file.read()
	return f"{contents}"
	#return f"Contents of {file_path}:\n{contents}"
	except FileNotFoundError:
	return "File not found."

	# Function to delete a file
	def delete_file(file_path):
	try:
	import os
	os.remove(file_path)
	return f"{file_path} has been deleted."
	except FileNotFoundError:
	return "File not found."

	# Function to write to a file
	def write_file(file_path, content):
	try:
	with open(file_path, "w") as file:
	file.write(content)
	return f"Successfully written to {file_path}."
	except:
	return "Error occurred while writing to file."

	# Function to append to a file
	def append_file(file_path, content):
	try:
	with open(file_path, "a") as file:
	file.write(content)
	return f"Successfully appended to {file_path}."
	except:
	return "Error occurred while appending to file."


	title = """<h1 align="center">Wikipedia Twitter ChatGPT Memory Chat</h1>"""
	description = """
	## ChatGPT Datasets 📚
	- WebText
	- Common Crawl
	- BooksCorpus
	- English Wikipedia
	- Toronto Books Corpus
	- OpenWebText
	## ChatGPT Datasets - Details 📚
	- WebText: A dataset of web pages crawled from domains on the Alexa top 5,000 list. This dataset was used to pretrain GPT-2.
	- [WebText: A Large-Scale Unsupervised Text Corpus by Radford et al.](https://paperswithcode.com/dataset/webtext)
	- Common Crawl: A dataset of web pages from a variety of domains, which is updated regularly. This dataset was used to pretrain GPT-3.
	- [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/common-crawl) by Brown et al.
	- BooksCorpus: A dataset of over 11,000 books from a variety of genres.
	- [Scalable Methods for 8 Billion Token Language Modeling](https://paperswithcode.com/dataset/bookcorpus) by Zhu et al.
	- English Wikipedia: A dump of the English-language Wikipedia as of 2018, with articles from 2001-2017.
	- [Improving Language Understanding by Generative Pre-Training](https://huggingface.co/spaces/awacke1/WikipediaUltimateAISearch?logs=build) Space for Wikipedia Search
	- Toronto Books Corpus: A dataset of over 7,000 books from a variety of genres, collected by the University of Toronto.
	- [Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond](https://paperswithcode.com/dataset/bookcorpus) by Schwenk and Douze.
	- OpenWebText: A dataset of web pages that were filtered to remove content that was likely to be low-quality or spammy. This dataset was used to pretrain GPT-3.
	- [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/openwebtext) by Brown et al.
	"""

	# 6. Use Gradio to pull it all together
	with gr.Blocks(css = """#col_container {width: 1280px; margin-left: auto; margin-right: auto;} #chatbot {height: 600px; overflow: auto;}""") as demo:
	gr.HTML(title)

	# Wikipedia context preloader
	with gr.Row(): # inputs and buttons
	inp = gr.Textbox(lines=1, default="ChatGPT", label="Question")
	with gr.Row(): # inputs and buttons
	b4 = gr.Button("Search Web Live")
	with gr.Row(): # output DF2
	out_DF = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate", datatype = ["markdown", "markdown"], headers=['Entity', 'Value'])


	# Accordian chat bot
	with gr.Column(elem_id = "col_container"):
	inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter")
	chatbot = gr.Chatbot(elem_id='chatbot')
	state = gr.State([])
	b1 = gr.Button()
	with gr.Accordion("Parameters", open=False):
	top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
	temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
	chat_counter = gr.Number(value=0, visible=True, precision=0)

	# Episodic/Semantic Memory IO
	fileName = gr.Textbox(label="Filename")
	fileContent = gr.TextArea(label="File Content")
	completedMessage = gr.Textbox(label="Completed")
	label = gr.Label()
	with gr.Row():
	listFiles = gr.Button("📄 List File(s)")
	readFile = gr.Button("📖 Read File")
	saveFile = gr.Button("💾 Save File")
	deleteFile = gr.Button("🗑️ Delete File")
	appendFile = gr.Button("➕ Append File")


	# ChatGPT events:
	listFiles.click(list_files, inputs=fileName, outputs=fileContent)
	readFile.click(read_file, inputs=fileName, outputs=fileContent)
	saveFile.click(write_file, inputs=[fileName, fileContent], outputs=completedMessage)
	deleteFile.click(delete_file, inputs=fileName, outputs=completedMessage)
	appendFile.click(append_file, inputs=[fileName, fileContent], outputs=completedMessage )

	# Wikipedia events
	b4.click(get_wiki_summary, inp, out_DF )
	inputs.submit(get_wiki_summary, inp, out_DF)


	# Chatbot
	inputs.submit(predict, [inputs, top_p, temperature,chat_counter, chatbot, state], [chatbot, state, chat_counter])
	b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter])
	b1.click(reset_textbox, [], [inputs])

	inputs.submit(reset_textbox, [], [inputs])
	gr.Markdown(description)

	# Queue and go!
	demo.queue().launch(debug=True)