yunuseduran commited on
Commit
78fae79
·
verified ·
1 Parent(s): cfa664f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ import markdown
4
+ from docx import Document
5
+ from bs4 import BeautifulSoup
6
+ import shutil
7
+ import subprocess
8
+ import os
9
+
10
+
11
+ def setup_api_key():
12
+ google_api_key = os.getenv("GOOGLE_API_KEY")
13
+ genai.configure(api_key=google_api_key)
14
+
15
+ def upload_file(file_path):
16
+ print(f"Uploading file...")
17
+ text_file = genai.upload_file(path=file_path)
18
+ print(f"Completed upload: {text_file.uri}")
19
+ return text_file
20
+
21
+ def to_markdown(text):
22
+ text = text.replace('•', ' *')
23
+ return markdown.markdown(text)
24
+
25
+ def build_model(text_file):
26
+ generation_config = {
27
+ "temperature": 0.2,
28
+ "top_p": 0.95,
29
+ "top_k": 64,
30
+ "max_output_tokens": 8192,
31
+ "response_mime_type": "text/plain",
32
+ }
33
+
34
+ model = genai.GenerativeModel(
35
+ model_name="gemini-1.5-flash",
36
+ generation_config=generation_config,
37
+ system_instruction="""Answer the questions based on the uploaded file.
38
+ If there is no related info in the file just reply 'I don't know.' """,
39
+ )
40
+
41
+ chat_session = model.start_chat(history=[])
42
+
43
+ response = chat_session.send_message(["Summarize the doc in one sentence", text_file])
44
+ return chat_session
45
+
46
+ def chat(chat_session, prompt):
47
+ response = chat_session.send_message(prompt)
48
+ return response.text
49
+
50
+ def generate_report(chat_session, questions):
51
+ report_text = ""
52
+ report_text += f"\n## QUESTIONS & ANSWERS\n"
53
+ for question in questions:
54
+ report_text += f"\n## {question}\n"
55
+ answer = chat(chat_session, question)
56
+ report_text += f"\n{answer}\n"
57
+ return report_text
58
+
59
+ def convert_markdown_to_html(report_text):
60
+ html_text = markdown.markdown(report_text)
61
+ return html_text
62
+
63
+ def add_html_to_word(html_text, doc):
64
+ soup = BeautifulSoup(html_text, 'html.parser')
65
+ for element in soup:
66
+ if element.name == 'h1':
67
+ doc.add_heading(element.get_text(), level=1)
68
+ elif element.name == 'h2':
69
+ doc.add_heading(element.get_text(), level=2)
70
+ elif element.name == 'h3':
71
+ doc.add_heading(element.get_text(), level=3)
72
+ elif element.name == 'h4':
73
+ doc.add_heading(element.get_text(), level=4)
74
+ elif element.name == 'h5':
75
+ doc.add_heading(element.get_text(), level=5)
76
+ elif element.name == 'h6':
77
+ doc.add_heading(element.get_text(), level=6)
78
+ elif element.name == 'p':
79
+ doc.add_paragraph(element.get_text())
80
+ elif element.name == 'ul':
81
+ for li in element.find_all('li'):
82
+ doc.add_paragraph(li.get_text(), style='List Bullet')
83
+ elif element.name == 'ol':
84
+ for li in element.find_all('li'):
85
+ doc.add_paragraph(li.get_text(), style='List Number')
86
+ elif element.name:
87
+ doc.add_paragraph(element.get_text()) # For any other tags
88
+
89
+ def process_pdf(pdf_file, user_questions):
90
+ file_name = pdf_file.name.split('/')[-1]
91
+ saved_file_path = f"/tmp/{file_name}"
92
+ shutil.copyfile(pdf_file.name, saved_file_path)
93
+
94
+ subprocess.run(["apt-get", "update"])
95
+ subprocess.run(["apt-get", "install", "-y", "poppler-utils"])
96
+ subprocess.run(["pdftotext", saved_file_path, "/tmp/text_file.txt"])
97
+
98
+ text_file = upload_file("/tmp/text_file.txt")
99
+ chat_session = build_model(text_file)
100
+
101
+ questions = user_questions.strip().split('\n')
102
+ report_text = generate_report(chat_session, questions)
103
+
104
+ doc = Document()
105
+ html_text = convert_markdown_to_html(report_text)
106
+ add_html_to_word(html_text, doc)
107
+
108
+ doc_name = file_name.replace(".pdf", ".docx")
109
+ doc_name = "Report_" + doc_name
110
+ doc.save(f"/tmp/{doc_name}")
111
+
112
+ return html_text, f"/tmp/{doc_name}"
113
+
114
+ questions = [
115
+ "Who are the authors of the article?",
116
+ "What models were used?",
117
+ "How many references are there?",
118
+ "In what year was it published?"
119
+ ]
120
+
121
+ questions_str = "\n".join(questions)
122
+
123
+ iface = gr.Interface(
124
+ fn=process_pdf,
125
+ inputs=[
126
+ gr.File(label="Upload PDF", type="file"),
127
+ gr.TextArea(label="Enter Questions", placeholder="Type your questions here, one per line.", value=questions_str)
128
+ ],
129
+ outputs=[
130
+ gr.HTML(label="HTML Formatted Report"),
131
+ gr.File(label="DOCX File Output", type="file")
132
+ ],
133
+ title="REPORT GENERATOR: ASK YOUR QUESTIONS TO A PDF FILE @YED",
134
+ description="Upload a PDF to ask questions and get the answers."
135
+ )
136
+
137
+ setup_api_key()
138
+ iface.launch()