academic

Paused

App Files Files Community

academic / crazy_functions /联网的ChatGPT.py

qingxu99

试试联网检索

90e1eef about 3 years ago

raw

history blame

4.99 kB

	from toolbox import CatchException, update_ui
	from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
	import requests
	from bs4 import BeautifulSoup
	from request_llm.bridge_all import model_info

	def google(query, proxies):
	query = query # 在此处替换您要搜索的关键词
	url = f"https://www.google.com/search?q={query}"
	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
	response = requests.get(url, headers=headers, proxies=proxies)
	soup = BeautifulSoup(response.content, 'html.parser')
	results = []
	for g in soup.find_all('div', class_='g'):
	anchors = g.find_all('a')
	if anchors:
	link = anchors[0]['href']
	if link.startswith('/url?q='):
	link = link[7:]
	if not link.startswith('http'):
	continue
	title = g.find('h3').text
	item = {'title': title, 'link': link}
	results.append(item)

	for r in results:
	print(r['link'])
	return results

	def scrape_text(url, proxies) -> str:
	"""Scrape text from a webpage

	Args:
	url (str): The URL to scrape text from

	Returns:
	str: The scraped text
	"""
	headers = {
	'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
	'Content-Type': 'text/plain',
	}
	try:
	response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
	if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
	except:
	return "无法连接到该网页"
	soup = BeautifulSoup(response.text, "html.parser")
	for script in soup(["script", "style"]):
	script.extract()
	text = soup.get_text()
	lines = (line.strip() for line in text.splitlines())
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	text = "\n".join(chunk for chunk in chunks if chunk)
	return text

	@CatchException
	def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
	"""
	txt 输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
	llm_kwargs gpt模型参数，如温度和top_p等，一般原样传递下去就行
	plugin_kwargs 插件模型的参数，暂时没有用武之地
	chatbot 聊天显示框的句柄，用于显示给用户
	history 聊天历史，前情提要
	system_prompt 给gpt的静默提醒
	web_port 当前软件运行的端口号
	"""
	history = [] # 清空历史，以免输入溢出
	chatbot.append((f"请结合互联网信息回答以下问题：{txt}",
	"[Local Message] 请注意，您正在调用一个[函数插件]的模板，该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者，它可以作为创建新功能函数的模板。您若希望分享新的功能模组，请不吝PR！"))
	yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新

	# ------------- < 第1步：爬取搜索引擎的结果 > -------------
	from toolbox import get_conf
	proxies, = get_conf('proxies')
	urls = google(txt, proxies)
	history = []

	# ------------- < 第2步：依次访问网页 > -------------
	max_search_result = 5 # 最多收纳多少个网页的结果
	for index, url in enumerate(urls[:max_search_result]):
	res = scrape_text(url['link'], proxies)
	history.extend([f"第{index}份搜索结果：", res])
	chatbot.append([f"第{index}份搜索结果：", res[:500]+"......"])
	yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新

	# ------------- < 第3步：ChatGPT综合 > -------------
	i_say = f"从以上搜索结果中抽取信息，然后回答问题：{txt}"
	i_say, history = input_clipping( # 裁剪输入，从最长的条目开始裁剪，防止爆token
	inputs=i_say,
	history=history,
	max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
	)
	gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
	inputs=i_say, inputs_show_user=i_say,
	llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
	sys_prompt="请从给定的若干条搜索结果中抽取信息，对最相关的两个搜索结果进行总结，然后回答问题。"
	)
	chatbot[-1] = (i_say, gpt_say)
	history.append(i_say);history.append(gpt_say)
	yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新