Spaces:

auto-academic
/

auto-draft

Runtime error

auto-draft / auto_backgrounds.py

sc_ma

Add functions to support cloud storage cache.

1b82d4c over 2 years ago

5.35 kB

	from utils.references import References
	from utils.prompts import generate_bg_keywords_prompts, generate_bg_summary_prompts
	from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
	from utils.tex_processing import replace_title
	import datetime
	import shutil
	import time
	import logging
	import os

	TOTAL_TOKENS = 0
	TOTAL_PROMPTS_TOKENS = 0
	TOTAL_COMPLETION_TOKENS = 0


	def hash_name(title, description):
	'''
	For same title and description, it should return the same value.
	'''
	name = title + description
	name = name.lower()
	md5 = hashlib.md5()
	md5.update(name.encode('utf-8'))
	hashed_string = md5.hexdigest()
	return hashed_string

	def log_usage(usage, generating_target, print_out=True):
	global TOTAL_TOKENS
	global TOTAL_PROMPTS_TOKENS
	global TOTAL_COMPLETION_TOKENS

	prompts_tokens = usage['prompt_tokens']
	completion_tokens = usage['completion_tokens']
	total_tokens = usage['total_tokens']

	TOTAL_TOKENS += total_tokens
	TOTAL_PROMPTS_TOKENS += prompts_tokens
	TOTAL_COMPLETION_TOKENS += completion_tokens

	message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
	f"{TOTAL_TOKENS} tokens have been used in total."
	if print_out:
	print(message)
	logging.info(message)

	def make_archive(source, destination):
	base = os.path.basename(destination)
	name = base.split('.')[0]
	format = base.split('.')[1]
	archive_from = os.path.dirname(source)
	archive_to = os.path.basename(source.strip(os.sep))
	shutil.make_archive(name, format, archive_from, archive_to)
	shutil.move('%s.%s'%(name,format), destination)
	return destination

	def pipeline(paper, section, save_to_path, model, openai_key=None):
	"""
	The main pipeline of generating a section.
	1. Generate prompts.
	2. Get responses from AI assistant.
	3. Extract the section text.
	4. Save the text to .tex file.
	:return usage
	"""
	print(f"Generating {section}...")
	prompts = generate_bg_summary_prompts(paper, section)
	gpt_response, usage = get_responses(prompts, model)
	output = extract_responses(gpt_response)
	paper["body"][section] = output
	tex_file = save_to_path + f"{section}.tex"
	if section == "abstract":
	with open(tex_file, "w") as f:
	f.write(r"\begin{abstract}")
	with open(tex_file, "a") as f:
	f.write(output)
	with open(tex_file, "a") as f:
	f.write(r"\end{abstract}")
	else:
	with open(tex_file, "w") as f:
	f.write(f"\section{{{section.upper()}}}\n")
	with open(tex_file, "a") as f:
	f.write(output)
	time.sleep(5)
	print(f"{section} has been generated. Saved to {tex_file}.")
	return usage



	def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4", openai_key=None):
	paper = {}
	paper_body = {}

	# Create a copy in the outputs folder.
	now = datetime.datetime.now()
	target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
	source_folder = f"latex_templates/{template}"
	destination_folder = f"outputs/{target_name}"
	shutil.copytree(source_folder, destination_folder)

	bibtex_path = destination_folder + "/ref.bib"
	save_to_path = destination_folder +"/"
	replace_title(save_to_path, "A Survey on " + title)
	logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")

	# Generate keywords and references
	print("Initialize the paper information ...")
	prompts = generate_bg_keywords_prompts(title, description)
	gpt_response, usage = get_responses(prompts, model)
	keywords = extract_keywords(gpt_response)
	log_usage(usage, "keywords")

	ref = References(load_papers = "")
	ref.collect_papers(keywords, method="arxiv")
	all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list

	print(f"The paper information has been initialized. References are saved to {bibtex_path}.")

	paper["title"] = title
	paper["description"] = description
	paper["references"] = ref.to_prompts() # to_prompts(top_papers)
	paper["body"] = paper_body
	paper["bibtex"] = bibtex_path

	for section in ["introduction", "related works", "backgrounds"]:
	try:
	usage = pipeline(paper, section, save_to_path, model=model)
	log_usage(usage, section)
	except Exception as e:
	print(f"Failed to generate {section} due to the error: {e}")
	print(f"The paper {title} has been generated. Saved to {save_to_path}.")
	# shutil.make_archive("output.zip", 'zip', save_to_path)
	return make_archive(destination_folder, "output.zip")


	def fake_generate_backgrounds(title, description, openai_key = None):
	"""
	This function is used to test the whole pipeline without calling OpenAI API.
	"""
	filename = hash_name(title, description) + ".zip"
	return make_archive("sample-output.pdf", filename)


	if __name__ == "__main__":
	title = "Reinforcement Learning"
	description = ""
	template = "Summary"
	model = "gpt-4"
	# model = "gpt-3.5-turbo"

	generate_backgrounds(title, description, template, model)