Spaces:
Running
Running
File size: 4,496 Bytes
78fae79 d45ca00 78fae79 9ff0896 78fae79 9ff0896 78fae79 9ff0896 78fae79 9ff0896 78fae79 9ff0896 78fae79 9ff0896 78fae79 d0adb44 78fae79 9ff0896 78fae79 9ff0896 78fae79 e928691 78fae79 d0adb44 78fae79 9ff0896 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
import google.generativeai as genai
import markdown
from docx import Document
from bs4 import BeautifulSoup
import shutil
import subprocess
import os
# Setup your API key
def setup_api_key():
google_api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=google_api_key)
def upload_file(file_path):
print(f"Uploading file...")
text_file = genai.upload_file(path=file_path)
print(f"Completed upload: {text_file.uri}")
return text_file
def to_markdown(text):
text = text.replace('•', ' *')
return markdown.markdown(text)
def build_model(text_file):
generation_config = {
"temperature": 0.2,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
system_instruction="""Answer the questions based on the uploaded file.
If there is no related info in the file just reply 'I don't know.' """,
)
chat_session = model.start_chat(history=[])
response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
return chat_session
def chat(chat_session, prompt):
response = chat_session.send_message(prompt)
return response.text
def generate_report(chat_session, questions):
report_text = ""
report_text += f"\n## QUESTIONS & ANSWERS\n"
for question in questions:
report_text += f"\n## {question}\n"
answer = chat(chat_session, question)
report_text += f"\n{answer}\n"
return report_text
def convert_markdown_to_html(report_text):
html_text = markdown.markdown(report_text)
return html_text
def add_html_to_word(html_text, doc):
soup = BeautifulSoup(html_text, 'html.parser')
for element in soup:
if element.name == 'h1':
doc.add_heading(element.get_text(), level=1)
elif element.name == 'h2':
doc.add_heading(element.get_text(), level=2)
elif element.name == 'h3':
doc.add_heading(element.get_text(), level=3)
elif element.name == 'h4':
doc.add_heading(element.get_text(), level=4)
elif element.name == 'h5':
doc.add_heading(element.get_text(), level=5)
elif element.name == 'h6':
doc.add_heading(element.get_text(), level=6)
elif element.name == 'p':
doc.add_paragraph(element.get_text())
elif element.name == 'ul':
for li in element.find_all('li'):
doc.add_paragraph(li.get_text(), style='List Bullet')
elif element.name == 'ol':
for li in element.find_all('li'):
doc.add_paragraph(li.get_text(), style='List Number')
elif element.name:
doc.add_paragraph(element.get_text()) # For any other tags
def process_pdf(pdf_file, user_questions):
file_name = pdf_file.split('/')[-1]
saved_file_path = f"/tmp/{file_name}"
shutil.copyfile(pdf_file, saved_file_path)
subprocess.run(["apt-get", "update"])
subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])
text_file = upload_file("/tmp/text_file.txt")
chat_session = build_model(text_file)
questions = user_questions.strip().split('\n')
report_text = generate_report(chat_session, questions)
doc = Document()
html_text = convert_markdown_to_html(report_text)
add_html_to_word(html_text, doc)
doc_name = file_name.replace(".pdf", ".docx")
doc_name = "Report_" + doc_name
doc.save(f"/tmp/{doc_name}")
return html_text, f"/tmp/{doc_name}"
questions = [
"Makalenin yazarları kimlerdir?",
"Hangi modeller kullanılmıştır?",
"Kaç referans vardır?",
"Hangi yılda yayınlanmıştır?"
]
questions_str = "\n".join(questions)
iface = gr.Interface(
fn=process_pdf,
inputs=[
gr.File(label="Upload PDF", type="filepath"),
gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
],
outputs=[
gr.HTML(label="HTML Formatted Report"),
gr.File(label="DOCX File Output", type="binary")
],
title="Pdflerinizden kısa rapor oluşturma aracı @YED",
description="Sorularınızı sormak ve cevap almak için PDF'inizi yükleyin."
)
setup_api_key()
iface.launch()
|