Spaces:
Running
Running
import gradio as gr | |
import google.generativeai as genai | |
import markdown | |
from docx import Document | |
from bs4 import BeautifulSoup | |
import shutil | |
import subprocess | |
import os | |
# Setup your API key | |
def setup_api_key(): | |
google_api_key = os.getenv("GOOGLE_API_KEY") | |
genai.configure(api_key=google_api_key) | |
def upload_file(file_path): | |
print(f"Uploading file...") | |
text_file = genai.upload_file(path=file_path) | |
print(f"Completed upload: {text_file.uri}") | |
return text_file | |
def to_markdown(text): | |
text = text.replace('•', ' *') | |
return markdown.markdown(text) | |
def build_model(text_file): | |
generation_config = { | |
"temperature": 0.2, | |
"top_p": 0.95, | |
"top_k": 64, | |
"max_output_tokens": 8192, | |
"response_mime_type": "text/plain", | |
} | |
model = genai.GenerativeModel( | |
model_name="gemini-1.5-flash", | |
generation_config=generation_config, | |
system_instruction="""Answer the questions based on the uploaded file. | |
If there is no related info in the file just reply 'I don't know.' """, | |
) | |
chat_session = model.start_chat(history=[]) | |
response = chat_session.send_message(["Summarize the doc in one sentence", text_file]) | |
return chat_session | |
def chat(chat_session, prompt): | |
response = chat_session.send_message(prompt) | |
return response.text | |
def generate_report(chat_session, questions): | |
report_text = "" | |
report_text += f"\n## QUESTIONS & ANSWERS\n" | |
for question in questions: | |
report_text += f"\n## {question}\n" | |
answer = chat(chat_session, question) | |
report_text += f"\n{answer}\n" | |
return report_text | |
def convert_markdown_to_html(report_text): | |
html_text = markdown.markdown(report_text) | |
return html_text | |
def add_html_to_word(html_text, doc): | |
soup = BeautifulSoup(html_text, 'html.parser') | |
for element in soup: | |
if element.name == 'h1': | |
doc.add_heading(element.get_text(), level=1) | |
elif element.name == 'h2': | |
doc.add_heading(element.get_text(), level=2) | |
elif element.name == 'h3': | |
doc.add_heading(element.get_text(), level=3) | |
elif element.name == 'h4': | |
doc.add_heading(element.get_text(), level=4) | |
elif element.name == 'h5': | |
doc.add_heading(element.get_text(), level=5) | |
elif element.name == 'h6': | |
doc.add_heading(element.get_text(), level=6) | |
elif element.name == 'p': | |
doc.add_paragraph(element.get_text()) | |
elif element.name == 'ul': | |
for li in element.find_all('li'): | |
doc.add_paragraph(li.get_text(), style='List Bullet') | |
elif element.name == 'ol': | |
for li in element.find_all('li'): | |
doc.add_paragraph(li.get_text(), style='List Number') | |
elif element.name: | |
doc.add_paragraph(element.get_text()) # For any other tags | |
def process_pdf(pdf_file, user_questions): | |
file_name = pdf_file.split('/')[-1] | |
saved_file_path = f"/tmp/{file_name}" | |
shutil.copyfile(pdf_file, saved_file_path) | |
subprocess.run(["apt-get", "update"]) | |
subprocess.run(["apt-get", "install", "-y", "poppler-utils"]) | |
subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"]) | |
text_file = upload_file("/tmp/text_file.txt") | |
chat_session = build_model(text_file) | |
questions = user_questions.strip().split('\n') | |
report_text = generate_report(chat_session, questions) | |
doc = Document() | |
html_text = convert_markdown_to_html(report_text) | |
add_html_to_word(html_text, doc) | |
doc_name = file_name.replace(".pdf", ".docx") | |
doc_name = "Report_" + doc_name | |
doc.save(f"/tmp/{doc_name}") | |
return html_text, f"/tmp/{doc_name}" | |
questions = [ | |
"Makalenin yazarları kimlerdir?", | |
"Hangi modeller kullanılmıştır?", | |
"Kaç referans vardır?", | |
"Hangi yılda yayınlanmıştır?" | |
] | |
questions_str = "\n".join(questions) | |
iface = gr.Interface( | |
fn=process_pdf, | |
inputs=[ | |
gr.File(label="Upload PDF", type="filepath"), | |
gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str) | |
], | |
outputs=[ | |
gr.HTML(label="HTML Formatted Report"), | |
gr.File(label="DOCX File Output", type="binary") | |
], | |
title="Pdflerinizden kısa rapor oluşturma aracı @YED", | |
description="Sorularınızı sormak ve cevap almak için PDF'inizi yükleyin." | |
) | |
setup_api_key() | |
iface.launch() | |