Spaces:

yunuseduran
/

chatpdf

Running

App Files Files Community

chatpdf / app.py

yunuseduran

Update app.py

d0adb44 verified about 1 year ago

raw

history blame

4.5 kB

	import gradio as gr
	import google.generativeai as genai
	import markdown
	from docx import Document
	from bs4 import BeautifulSoup
	import shutil
	import subprocess
	import os

	# Setup your API key
	def setup_api_key():
	google_api_key = os.getenv("GOOGLE_API_KEY")
	genai.configure(api_key=google_api_key)

	def upload_file(file_path):
	print(f"Uploading file...")
	text_file = genai.upload_file(path=file_path)
	print(f"Completed upload: {text_file.uri}")
	return text_file

	def to_markdown(text):
	text = text.replace('•', ' *')
	return markdown.markdown(text)

	def build_model(text_file):
	generation_config = {
	"temperature": 0.2,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=generation_config,
	system_instruction="""Answer the questions based on the uploaded file.
	If there is no related info in the file just reply 'I don't know.' """,
	)

	chat_session = model.start_chat(history=[])

	response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
	return chat_session

	def chat(chat_session, prompt):
	response = chat_session.send_message(prompt)
	return response.text

	def generate_report(chat_session, questions):
	report_text = ""
	report_text += f"\n## QUESTIONS & ANSWERS\n"
	for question in questions:
	report_text += f"\n## {question}\n"
	answer = chat(chat_session, question)
	report_text += f"\n{answer}\n"
	return report_text

	def convert_markdown_to_html(report_text):
	html_text = markdown.markdown(report_text)
	return html_text

	def add_html_to_word(html_text, doc):
	soup = BeautifulSoup(html_text, 'html.parser')
	for element in soup:
	if element.name == 'h1':
	doc.add_heading(element.get_text(), level=1)
	elif element.name == 'h2':
	doc.add_heading(element.get_text(), level=2)
	elif element.name == 'h3':
	doc.add_heading(element.get_text(), level=3)
	elif element.name == 'h4':
	doc.add_heading(element.get_text(), level=4)
	elif element.name == 'h5':
	doc.add_heading(element.get_text(), level=5)
	elif element.name == 'h6':
	doc.add_heading(element.get_text(), level=6)
	elif element.name == 'p':
	doc.add_paragraph(element.get_text())
	elif element.name == 'ul':
	for li in element.find_all('li'):
	doc.add_paragraph(li.get_text(), style='List Bullet')
	elif element.name == 'ol':
	for li in element.find_all('li'):
	doc.add_paragraph(li.get_text(), style='List Number')
	elif element.name:
	doc.add_paragraph(element.get_text()) # For any other tags

	def process_pdf(pdf_file, user_questions):
	file_name = pdf_file.split('/')[-1]
	saved_file_path = f"/tmp/{file_name}"
	shutil.copyfile(pdf_file, saved_file_path)

	subprocess.run(["apt-get", "update"])
	subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
	subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])

	text_file = upload_file("/tmp/text_file.txt")
	chat_session = build_model(text_file)

	questions = user_questions.strip().split('\n')
	report_text = generate_report(chat_session, questions)

	doc = Document()
	html_text = convert_markdown_to_html(report_text)
	add_html_to_word(html_text, doc)

	doc_name = file_name.replace(".pdf", ".docx")
	doc_name = "Report_" + doc_name
	doc.save(f"/tmp/{doc_name}")

	return html_text, f"/tmp/{doc_name}"

	questions = [
	"Makalenin yazarları kimlerdir?",
	"Hangi modeller kullanılmıştır?",
	"Kaç referans vardır?",
	"Hangi yılda yayınlanmıştır?"
	]

	questions_str = "\n".join(questions)

	iface = gr.Interface(
	fn=process_pdf,
	inputs=[
	gr.File(label="Upload PDF", type="filepath"),
	gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
	],
	outputs=[
	gr.HTML(label="HTML Formatted Report"),
	gr.File(label="DOCX File Output", type="binary")
	],
	title="Pdflerinizden kısa rapor oluşturma aracı @YED",
	description="Sorularınızı sormak ve cevap almak için PDF'inizi yükleyin."
	)

	setup_api_key()
	iface.launch()