File size: 4,488 Bytes
78fae79
 
 
 
 
 
 
 
 
d45ca00
78fae79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d45ca00
78fae79
d45ca00
78fae79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d45ca00
78fae79
 
 
 
 
 
d45ca00
78fae79
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
import google.generativeai as genai
import markdown
from docx import Document
from bs4 import BeautifulSoup
import shutil
import subprocess
import os

# Setup your API key
def setup_api_key():
    google_api_key = os.getenv("GOOGLE_API_KEY")
    genai.configure(api_key=google_api_key)

def upload_file(file_path):
    print(f"Uploading file...")
    text_file = genai.upload_file(path=file_path)
    print(f"Completed upload: {text_file.uri}")
    return text_file

def to_markdown(text):
    text = text.replace('•', '  *')
    return markdown.markdown(text)

def build_model(text_file):
    generation_config = {
        "temperature": 0.2,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        system_instruction="""Answer the questions based on the uploaded file.
        If there is no related info in the file just reply 'I don't know.' """,
    )

    chat_session = model.start_chat(history=[])

    response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
    return chat_session

def chat(chat_session, prompt):
    response = chat_session.send_message(prompt)
    return response.text

def generate_report(chat_session, questions):
    report_text = ""
    report_text += f"\n## QUESTIONS & ANSWERS\n"
    for question in questions:
        report_text += f"\n## {question}\n"
        answer = chat(chat_session, question)
        report_text += f"\n{answer}\n"
    return report_text

def convert_markdown_to_html(report_text):
    html_text = markdown.markdown(report_text)
    return html_text

def add_html_to_word(html_text, doc):
    soup = BeautifulSoup(html_text, 'html.parser')
    for element in soup:
        if element.name == 'h1':
            doc.add_heading(element.get_text(), level=1)
        elif element.name == 'h2':
            doc.add_heading(element.get_text(), level=2)
        elif element.name == 'h3':
            doc.add_heading(element.get_text(), level=3)
        elif element.name == 'h4':
            doc.add_heading(element.get_text(), level=4)
        elif element.name == 'h5':
            doc.add_heading(element.get_text(), level=5)
        elif element.name == 'h6':
            doc.add_heading(element.get_text(), level=6)
        elif element.name == 'p':
            doc.add_paragraph(element.get_text())
        elif element.name == 'ul':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Bullet')
        elif element.name == 'ol':
            for li in element.find_all('li'):
                doc.add_paragraph(li.get_text(), style='List Number')
        elif element.name:
            doc.add_paragraph(element.get_text())  # For any other tags

def process_pdf(pdf_file, user_questions):
    file_name = pdf_file.split('/')[-1]
    saved_file_path = f"/tmp/{file_name}"
    shutil.copyfile(pdf_file, saved_file_path)

    subprocess.run(["apt-get", "update"])
    subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
    subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])

    text_file = upload_file("/tmp/text_file.txt")
    chat_session = build_model(text_file)

    questions = user_questions.strip().split('\n')
    report_text = generate_report(chat_session, questions)

    doc = Document()
    html_text = convert_markdown_to_html(report_text)
    add_html_to_word(html_text, doc)

    doc_name = file_name.replace(".pdf", ".docx")
    doc_name = "Report_" + doc_name
    doc.save(f"/tmp/{doc_name}")

    return html_text, f"/tmp/{doc_name}"

questions = [
    "Who are the authors of the article?",
    "What models were used?",
    "How many references are there?",
    "In what year was it published?"
]

questions_str = "\n".join(questions)

iface = gr.Interface(
    fn=process_pdf,
    inputs=[
        gr.File(label="Upload PDF", type="filepath"),
        gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
    ],
    outputs=[
        gr.HTML(label="HTML Formatted Report"),
        gr.File(label="DOCX File Output", type="file")
    ],
    title="REPORT GENERATOR: ASK YOUR QUESTIONS TO A PDF FILE BY @YED",
    description="Upload a PDF to ask questions and get the answers."
)

setup_api_key()
iface.launch()