File size: 4,496 Bytes
78fae79
 
 
 
 
 
 
 
 
d45ca00
78fae79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ff0896
 
 
 
78fae79
9ff0896
 
 
78fae79
9ff0896
 
78fae79
9ff0896
 
78fae79
 
9ff0896
78fae79
 
9ff0896
 
78fae79
 
 
 
 
d0adb44
 
 
 
78fae79
 
 
 
 
9ff0896
78fae79
9ff0896
78fae79
 
 
 
e928691
78fae79
d0adb44
 
78fae79
 
 
9ff0896
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
import google.generativeai as genai
import markdown
from docx import Document
from bs4 import BeautifulSoup
import shutil
import subprocess
import os

# Setup your API key
def setup_api_key():
    google_api_key = os.getenv("GOOGLE_API_KEY")
    genai.configure(api_key=google_api_key)

def upload_file(file_path):
    print(f"Uploading file...")
    text_file = genai.upload_file(path=file_path)
    print(f"Completed upload: {text_file.uri}")
    return text_file

def to_markdown(text):
    text = text.replace('•', '  *')
    return markdown.markdown(text)

def build_model(text_file):
    generation_config = {
        "temperature": 0.2,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        system_instruction="""Answer the questions based on the uploaded file.
        If there is no related info in the file just reply 'I don't know.' """,
    )

    chat_session = model.start_chat(history=[])

    response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
    return chat_session

def chat(chat_session, prompt):
    response = chat_session.send_message(prompt)
    return response.text

def generate_report(chat_session, questions):
    report_text = ""
    report_text += f"\n## QUESTIONS & ANSWERS\n"
    for question in questions:
        report_text += f"\n## {question}\n"
        answer = chat(chat_session, question)
        report_text += f"\n{answer}\n"
    return report_text

def convert_markdown_to_html(report_text):
    html_text = markdown.markdown(report_text)
    return html_text

def add_html_to_word(html_text, doc):
    soup = BeautifulSoup(html_text, 'html.parser')
    for element in soup:
        if element.name == 'h1':
            doc.add_heading(element.get_text(), level=1)
        elif element.name == 'h2':
            doc.add_heading(element.get_text(), level=2)
        elif element.name == 'h3':
            doc.add_heading(element.get_text(), level=3)
        elif element.name == 'h4':
            doc.add_heading(element.get_text(), level=4)
        elif element.name == 'h5':
            doc.add_heading(element.get_text(), level=5)
        elif element.name == 'h6':
            doc.add_heading(element.get_text(), level=6)
        elif element.name == 'p':
            doc.add_paragraph(element.get_text())
        elif element.name == 'ul':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Bullet')
        elif element.name == 'ol':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Number')
        elif element.name:
            doc.add_paragraph(element.get_text())  # For any other tags

def process_pdf(pdf_file, user_questions):
    file_name = pdf_file.split('/')[-1]
    saved_file_path = f"/tmp/{file_name}"
    shutil.copyfile(pdf_file, saved_file_path)

    subprocess.run(["apt-get", "update"])
    subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
    subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])

    text_file = upload_file("/tmp/text_file.txt")
    chat_session = build_model(text_file)

    questions = user_questions.strip().split('\n')
    report_text = generate_report(chat_session, questions)

    doc = Document()
    html_text = convert_markdown_to_html(report_text)
    add_html_to_word(html_text, doc)

    doc_name = file_name.replace(".pdf", ".docx")
    doc_name = "Report_" + doc_name
    doc.save(f"/tmp/{doc_name}")

    return html_text, f"/tmp/{doc_name}"

questions = [
    "Makalenin yazarları kimlerdir?",
    "Hangi modeller kullanılmıştır?",
    "Kaç referans vardır?",
    "Hangi yılda yayınlanmıştır?"
]

questions_str = "\n".join(questions)

iface = gr.Interface(
    fn=process_pdf,
    inputs=[
        gr.File(label="Upload PDF", type="filepath"),
        gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
    ],
    outputs=[
        gr.HTML(label="HTML Formatted Report"),
        gr.File(label="DOCX File Output", type="binary")
    ],
    title="Pdflerinizden kısa rapor oluşturma aracı @YED",
    description="Sorularınızı sormak ve cevap almak için PDF'inizi yükleyin."
)

setup_api_key()
iface.launch()