Update app.py
Browse files
app.py
CHANGED
@@ -3,225 +3,719 @@ import openai
|
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import os
|
5 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Variable to store API key
|
8 |
api_key = ""
|
9 |
|
10 |
# Function to update API key
|
11 |
-
def set_api_key(key):
|
|
|
12 |
global api_key
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
# Function to extract text from PDF
|
17 |
-
def extract_text_from_pdf(pdf_path):
|
|
|
18 |
try:
|
19 |
doc = fitz.open(pdf_path)
|
20 |
-
text = "
|
|
|
|
|
|
|
21 |
return text
|
22 |
except Exception as e:
|
|
|
23 |
return f"Error extracting text from PDF: {str(e)}"
|
24 |
|
25 |
-
# Function to
|
26 |
-
def
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
if
|
31 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
try:
|
37 |
openai.api_key = api_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
# Create the system message with systematic review guidelines
|
40 |
-
system_prompt = """
|
41 |
Step 1: Identify a Research Field
|
42 |
-
|
43 |
|
44 |
Step 2: Generate a Research Question
|
45 |
-
|
46 |
|
47 |
Step 3: Create a Protocol
|
48 |
-
|
49 |
|
50 |
Step 4: Evaluate Relevant Literature
|
51 |
-
|
52 |
|
53 |
Step 5: Investigate Sources for Answers
|
54 |
-
|
55 |
|
56 |
Step 6: Collect Data as per Protocol
|
57 |
-
|
58 |
|
59 |
Step 7: Data Extraction
|
60 |
-
|
61 |
|
62 |
Step 8: Critical Analysis of Results
|
63 |
-
|
64 |
|
65 |
Step 9: Interpreting Derivations
|
66 |
-
|
67 |
|
68 |
Step 10: Concluding Statements
|
69 |
-
|
|
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
"""
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# Prepare the user prompt with the review question and instructions
|
92 |
table_instruction = ""
|
93 |
if include_tables:
|
94 |
-
table_instruction = " Please include important
|
95 |
|
96 |
-
user_prompt = f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
# Create the messages for the API call
|
99 |
messages = [
|
100 |
{"role": "system", "content": system_prompt},
|
101 |
-
{"role": "user", "content": user_prompt +
|
102 |
]
|
103 |
|
104 |
-
|
|
|
|
|
105 |
response = openai.ChatCompletion.create(
|
106 |
-
model=
|
107 |
messages=messages,
|
108 |
-
temperature=
|
109 |
-
|
110 |
-
max_tokens=2048
|
111 |
)
|
112 |
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
except Exception as e:
|
|
|
116 |
return f"Error generating systematic review: {str(e)}"
|
117 |
|
118 |
# Function to save uploaded files
|
119 |
-
def save_uploaded_files(files):
|
|
|
120 |
if not files:
|
121 |
return []
|
122 |
|
123 |
saved_paths = []
|
124 |
for file in files:
|
125 |
if file is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# Create a temporary file
|
127 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
saved_paths.append(tmp_file.name)
|
130 |
|
131 |
return saved_paths
|
132 |
|
133 |
-
#
|
134 |
-
with gr.Blocks() as demo:
|
135 |
-
gr.Markdown("# Systematic Review Generator for Research Papers")
|
136 |
-
|
137 |
-
with gr.Accordion("How to Use This App", open=True):
|
138 |
-
gr.Markdown("""
|
139 |
-
### Getting Started:
|
140 |
-
1. Enter your OpenAI API key in the field below and click "Set API Key"
|
141 |
-
2. Upload multiple PDF research papers (2 or more recommended)
|
142 |
-
3. Enter your review question or topic
|
143 |
-
4. Check the "Include Tables" option if you want the review to include comparison tables
|
144 |
-
5. Click "Generate Systematic Review" to start the process
|
145 |
-
|
146 |
-
### Tips:
|
147 |
-
- For best results, upload papers that are related to the same research topic or field
|
148 |
-
- Be specific in your review question to get more focused results
|
149 |
-
- The generated review will follow a systematic structure including research field identification, data extraction, analysis, and conclusions
|
150 |
-
- The more papers you upload, the more comprehensive the review will be
|
151 |
-
""")
|
152 |
-
|
153 |
-
# API Key Input
|
154 |
-
with gr.Row():
|
155 |
-
api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
|
156 |
-
api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
|
157 |
-
api_key_output = gr.Textbox(label="API Key Status", interactive=False)
|
158 |
-
|
159 |
-
# PDF Upload and Review Settings
|
160 |
-
with gr.Row():
|
161 |
-
with gr.Column():
|
162 |
-
pdf_files = gr.File(label="Upload PDF Research Papers", file_count="multiple", type="binary")
|
163 |
-
review_question = gr.Textbox(label="Review Question or Topic", placeholder="What are the current advances in GAN applications for speech processing?")
|
164 |
-
include_tables = gr.Checkbox(label="Include Comparison Tables", value=True)
|
165 |
-
generate_button = gr.Button("Generate Systematic Review", elem_id="generate_button")
|
166 |
-
|
167 |
-
# Output
|
168 |
-
review_output = gr.Textbox(label="Systematic Review", interactive=False, lines=20)
|
169 |
-
|
170 |
-
# Button actions
|
171 |
-
api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
|
172 |
-
|
173 |
-
# Generate systematic review
|
174 |
-
def process_files_and_generate_review(files, question, include_tables):
|
175 |
-
if not files:
|
176 |
-
return "Please upload at least one PDF file."
|
177 |
-
|
178 |
-
# Save uploaded files
|
179 |
-
saved_paths = save_uploaded_files(files)
|
180 |
-
|
181 |
-
# Generate review
|
182 |
-
review = generate_systematic_review(saved_paths, question, include_tables)
|
183 |
-
|
184 |
-
# Clean up temporary files
|
185 |
-
for path in saved_paths:
|
186 |
-
try:
|
187 |
-
os.remove(path)
|
188 |
-
except:
|
189 |
-
pass
|
190 |
-
|
191 |
-
return review
|
192 |
-
|
193 |
-
generate_button.click(
|
194 |
-
process_files_and_generate_review,
|
195 |
-
inputs=[pdf_files, review_question, include_tables],
|
196 |
-
outputs=[review_output]
|
197 |
-
)
|
198 |
-
|
199 |
-
# Add CSS styling
|
200 |
css = """
|
201 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
#generate_button {
|
203 |
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
204 |
color: white;
|
205 |
font-weight: bold;
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
}
|
|
|
207 |
#generate_button:hover {
|
208 |
background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
|
|
|
|
|
209 |
}
|
|
|
210 |
#api_key_button {
|
211 |
background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
|
212 |
color: white;
|
213 |
font-weight: bold;
|
214 |
margin-top: 27px;
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
}
|
|
|
216 |
#api_key_button:hover {
|
217 |
background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
.gradio-container {
|
220 |
max-width: 1200px !important;
|
221 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
</style>
|
223 |
"""
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
# Launch the app
|
226 |
if __name__ == "__main__":
|
|
|
227 |
demo.launch(share=True)
|
|
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import os
|
5 |
import tempfile
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
+
import re
|
9 |
+
from typing import List, Optional, Dict, Any, Union
|
10 |
+
import markdown
|
11 |
+
import concurrent.futures
|
12 |
+
|
13 |
+
# Set up logging
|
14 |
+
logging.basicConfig(
|
15 |
+
level=logging.INFO,
|
16 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
17 |
+
)
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
|
20 |
# Variable to store API key
|
21 |
api_key = ""
|
22 |
|
23 |
# Function to update API key
|
24 |
+
def set_api_key(key: str) -> str:
|
25 |
+
"""Set the OpenAI API key."""
|
26 |
global api_key
|
27 |
+
if not key.strip():
|
28 |
+
return "Please enter a valid API key"
|
29 |
+
|
30 |
+
api_key = key.strip()
|
31 |
+
return "✅ API Key Set Successfully!"
|
32 |
|
33 |
# Function to extract text from PDF
|
34 |
+
def extract_text_from_pdf(pdf_path: str) -> str:
|
35 |
+
"""Extract text content from a PDF file."""
|
36 |
try:
|
37 |
doc = fitz.open(pdf_path)
|
38 |
+
text = ""
|
39 |
+
for page_num, page in enumerate(doc):
|
40 |
+
text += f"\n--- Page {page_num + 1} ---\n"
|
41 |
+
text += page.get_text("text")
|
42 |
return text
|
43 |
except Exception as e:
|
44 |
+
logger.error(f"Error extracting text from PDF: {str(e)}")
|
45 |
return f"Error extracting text from PDF: {str(e)}"
|
46 |
|
47 |
+
# Function to truncate text to fit token limits
|
48 |
+
def truncate_text_for_tokens(text: str, max_tokens: int = 8000) -> str:
|
49 |
+
"""Truncate text to approximately fit within token limits."""
|
50 |
+
# Rough approximation: 1 token ≈ 4 characters in English
|
51 |
+
char_limit = max_tokens * 4
|
52 |
+
if len(text) > char_limit:
|
53 |
+
return text[:char_limit] + "\n[Content truncated due to length...]"
|
54 |
+
return text
|
55 |
+
|
56 |
+
# Function to extract title from PDF content
|
57 |
+
def extract_title(pdf_text: str) -> str:
|
58 |
+
"""Attempt to extract a title from PDF text."""
|
59 |
+
# Look for title in first few lines
|
60 |
+
first_lines = pdf_text.split('\n')[:10]
|
61 |
+
for line in first_lines:
|
62 |
+
line = line.strip()
|
63 |
+
# Title candidates: all caps, longer than 5 chars, shorter than 200
|
64 |
+
if len(line) > 5 and len(line) < 200 and not line.startswith('---'):
|
65 |
+
return line
|
66 |
|
67 |
+
return "Untitled Document"
|
68 |
+
|
69 |
+
# Model selection options
|
70 |
+
MODEL_OPTIONS = {
|
71 |
+
"gpt-4.1": "GPT-4 (Most powerful, slower)",
|
72 |
+
"gpt-3.5-turbo": "GPT-3.5 Turbo (Faster, less powerful)"
|
73 |
+
}
|
74 |
+
|
75 |
+
# Function to get available OpenAI models
|
76 |
+
def get_available_models() -> List[str]:
|
77 |
+
"""Get list of available OpenAI models."""
|
78 |
+
if not api_key:
|
79 |
+
return list(MODEL_OPTIONS.keys())
|
80 |
|
81 |
try:
|
82 |
openai.api_key = api_key
|
83 |
+
response = openai.Model.list()
|
84 |
+
models = [model.id for model in response['data'] if 'gpt' in model.id.lower()]
|
85 |
+
# Add to our options if found
|
86 |
+
for model in models:
|
87 |
+
if model not in MODEL_OPTIONS and ('gpt-4.1' in model or 'gpt-3.5-turbo' in model):
|
88 |
+
MODEL_OPTIONS[model] = model
|
89 |
+
return list(MODEL_OPTIONS.keys())
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Error fetching models: {str(e)}")
|
92 |
+
return list(MODEL_OPTIONS.keys())
|
93 |
+
|
94 |
+
# Function for parallel PDF processing
|
95 |
+
def process_pdf_in_parallel(pdf_files: List[str]) -> List[tuple]:
|
96 |
+
"""Process multiple PDFs in parallel to extract text."""
|
97 |
+
results = []
|
98 |
+
|
99 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
100 |
+
future_to_pdf = {executor.submit(extract_text_from_pdf, pdf_path): pdf_path for pdf_path in pdf_files}
|
101 |
+
for future in concurrent.futures.as_completed(future_to_pdf):
|
102 |
+
pdf_path = future_to_pdf[future]
|
103 |
+
pdf_name = os.path.basename(pdf_path)
|
104 |
+
try:
|
105 |
+
pdf_text = future.result()
|
106 |
+
# Truncate if needed
|
107 |
+
pdf_text = truncate_text_for_tokens(pdf_text)
|
108 |
+
results.append((pdf_name, pdf_text))
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f"Error processing {pdf_name}: {str(e)}")
|
111 |
+
results.append((pdf_name, f"Error processing file: {str(e)}"))
|
112 |
+
|
113 |
+
return results
|
114 |
+
|
115 |
+
# Function to create system prompt
|
116 |
+
def create_system_prompt(review_type: str = "systematic") -> str:
|
117 |
+
"""Create system prompt based on review type."""
|
118 |
+
if review_type == "systematic":
|
119 |
+
return """
|
120 |
+
You are an expert academic researcher tasked with creating comprehensive systematic reviews. Follow these steps:
|
121 |
|
|
|
|
|
122 |
Step 1: Identify a Research Field
|
123 |
+
Identify the specific area of study represented in the provided papers.
|
124 |
|
125 |
Step 2: Generate a Research Question
|
126 |
+
Create a specific, measurable, achievable, relevant, and time-bound (SMART) research question that unifies the papers.
|
127 |
|
128 |
Step 3: Create a Protocol
|
129 |
+
Outline a detailed methodology for your review, including analysis methods appropriate for the papers.
|
130 |
|
131 |
Step 4: Evaluate Relevant Literature
|
132 |
+
Critically evaluate the quality, methodology, and findings of the provided papers, identifying gaps or limitations.
|
133 |
|
134 |
Step 5: Investigate Sources for Answers
|
135 |
+
Examine how the papers contribute to answering the research question.
|
136 |
|
137 |
Step 6: Collect Data as per Protocol
|
138 |
+
Implement rigorous data collection methods, extracting key findings and statistics.
|
139 |
|
140 |
Step 7: Data Extraction
|
141 |
+
Organize the extracted data in a structured format, including tables where appropriate.
|
142 |
|
143 |
Step 8: Critical Analysis of Results
|
144 |
+
Interpret patterns, trends, and conclusions from the data, comparing findings across papers.
|
145 |
|
146 |
Step 9: Interpreting Derivations
|
147 |
+
Contextualize the findings in relation to the research question and broader field.
|
148 |
|
149 |
Step 10: Concluding Statements
|
150 |
+
Summarize findings, draw conclusions, and provide recommendations for future research.
|
151 |
+
|
152 |
+
Step 11: References
|
153 |
+
Include proper citations for all papers reviewed and any additional references.
|
154 |
|
155 |
+
Your review should be:
|
156 |
+
- Comprehensive yet concise
|
157 |
+
- Well-structured with clear headings and subheadings
|
158 |
+
- Using academic language appropriate for a scholarly audience
|
159 |
+
- Including data visualizations or tables where helpful
|
160 |
+
- Balanced and objective in evaluating the evidence
|
161 |
"""
|
162 |
+
elif review_type == "literature":
|
163 |
+
return """
|
164 |
+
You are an expert academic researcher tasked with creating a thorough literature review. Your review should:
|
165 |
|
166 |
+
1. Provide an overview of the current state of knowledge in the specific field
|
167 |
+
2. Identify common themes, methodologies, and findings across the papers
|
168 |
+
3. Highlight contradictions or inconsistencies in the literature
|
169 |
+
4. Evaluate the strength of evidence for key claims
|
170 |
+
5. Identify research gaps and future directions
|
171 |
+
6. Organize findings in a logical, thematic structure
|
172 |
+
7. Include visual elements (tables, concept maps) to synthesize information
|
173 |
+
8. Maintain academic rigor and proper attribution
|
174 |
|
175 |
+
Your review should be scholarly in tone, well-organized, and provide a balanced assessment of the literature.
|
176 |
+
"""
|
177 |
+
else: # meta-analysis
|
178 |
+
return """
|
179 |
+
You are an expert researcher conducting a meta-analysis of the provided papers. Your analysis should:
|
180 |
+
|
181 |
+
1. Identify a precise research question that can be answered quantitatively
|
182 |
+
2. Extract comparable quantitative data, effect sizes, or statistics from the papers
|
183 |
+
3. Assess the methodological quality and risk of bias in each study
|
184 |
+
4. Synthesize findings using appropriate statistical methods
|
185 |
+
5. Present results using forest plots, funnel plots, or other visualizations
|
186 |
+
6. Discuss heterogeneity and its potential sources
|
187 |
+
7. Evaluate publication bias and its impact on the findings
|
188 |
+
8. Draw conclusions based on the pooled data
|
189 |
+
9. Discuss implications for practice and future research
|
190 |
+
|
191 |
+
Your meta-analysis should follow PRISMA guidelines where applicable, maintain statistical rigor, and provide clear visual representations of the quantitative synthesis.
|
192 |
+
"""
|
193 |
+
|
194 |
+
# Function to interact with OpenAI API for systematic review
|
195 |
+
def generate_systematic_review(
|
196 |
+
pdf_files: List[str],
|
197 |
+
review_question: str,
|
198 |
+
model: str = "gpt-4.1",
|
199 |
+
review_type: str = "systematic",
|
200 |
+
include_tables: bool = True,
|
201 |
+
temperature: float = 0.7,
|
202 |
+
max_tokens: int = 4000
|
203 |
+
) -> str:
|
204 |
+
"""Generate a systematic review of the provided PDF files."""
|
205 |
+
if not api_key:
|
206 |
+
return "Please enter your OpenAI API key first."
|
207 |
+
|
208 |
+
if not pdf_files:
|
209 |
+
return "Please upload at least one PDF file."
|
210 |
+
|
211 |
+
if not review_question:
|
212 |
+
return "Please enter a review question."
|
213 |
+
|
214 |
+
try:
|
215 |
+
# Start timer
|
216 |
+
start_time = time.time()
|
217 |
+
|
218 |
+
openai.api_key = api_key
|
219 |
+
|
220 |
+
# Create the system message with review guidelines
|
221 |
+
system_prompt = create_system_prompt(review_type)
|
222 |
+
|
223 |
+
# Process PDFs in parallel
|
224 |
+
logger.info(f"Processing {len(pdf_files)} PDFs...")
|
225 |
+
pdf_results = process_pdf_in_parallel(pdf_files)
|
226 |
+
|
227 |
+
# Extract titles for reference
|
228 |
+
titles = [extract_title(pdf_text) for _, pdf_text in pdf_results]
|
229 |
+
pdf_names = [name for name, _ in pdf_results]
|
230 |
|
231 |
# Prepare the user prompt with the review question and instructions
|
232 |
table_instruction = ""
|
233 |
if include_tables:
|
234 |
+
table_instruction = " Please include important tables, charts or figures in your review to help summarize the findings."
|
235 |
|
236 |
+
user_prompt = f"""
|
237 |
+
Please generate a {review_type} review of the following {len(pdf_files)} papers:
|
238 |
+
{', '.join([f"{i+1}. {pdf_names[i]} (Title: {titles[i]})" for i in range(len(pdf_names))])}
|
239 |
+
|
240 |
+
Review Question: {review_question}
|
241 |
+
|
242 |
+
{table_instruction}
|
243 |
+
|
244 |
+
Format your response with clear headings, subheadings, and properly formatted tables using markdown syntax.
|
245 |
+
"""
|
246 |
+
|
247 |
+
# Combine PDF texts, with truncation if needed
|
248 |
+
combined_pdf_text = ""
|
249 |
+
total_chars = 0
|
250 |
+
max_chars = 20000 # Rough approximation to fit within token limits
|
251 |
+
|
252 |
+
for i, (pdf_name, pdf_text) in enumerate(pdf_results):
|
253 |
+
header = f"\n\n--- PAPER {i+1}: {pdf_name} ---\n\n"
|
254 |
+
if total_chars + len(header) + len(pdf_text) > max_chars:
|
255 |
+
# Truncate this paper's text
|
256 |
+
remaining = max_chars - total_chars - len(header)
|
257 |
+
if remaining > 500: # Only add if we can include meaningful content
|
258 |
+
truncated_text = pdf_text[:remaining] + "\n[... Content truncated due to length limitations ...]"
|
259 |
+
combined_pdf_text += header + truncated_text
|
260 |
+
total_chars += len(header) + len(truncated_text)
|
261 |
+
break
|
262 |
+
else:
|
263 |
+
combined_pdf_text += header + pdf_text
|
264 |
+
total_chars += len(header) + len(pdf_text)
|
265 |
|
266 |
# Create the messages for the API call
|
267 |
messages = [
|
268 |
{"role": "system", "content": system_prompt},
|
269 |
+
{"role": "user", "content": user_prompt + combined_pdf_text}
|
270 |
]
|
271 |
|
272 |
+
logger.info(f"Sending request to OpenAI API (model: {model})...")
|
273 |
+
|
274 |
+
# Call the API
|
275 |
response = openai.ChatCompletion.create(
|
276 |
+
model=model,
|
277 |
messages=messages,
|
278 |
+
temperature=temperature,
|
279 |
+
max_tokens=max_tokens
|
|
|
280 |
)
|
281 |
|
282 |
+
result = response["choices"][0]["message"]["content"]
|
283 |
+
|
284 |
+
# Convert markdown to HTML for tables
|
285 |
+
result_html = markdown.markdown(result, extensions=['tables'])
|
286 |
+
|
287 |
+
# Calculate time taken
|
288 |
+
time_taken = time.time() - start_time
|
289 |
+
logger.info(f"Review generated in {time_taken:.2f} seconds")
|
290 |
+
|
291 |
+
return result
|
292 |
|
293 |
except Exception as e:
|
294 |
+
logger.error(f"Error generating review: {str(e)}")
|
295 |
return f"Error generating systematic review: {str(e)}"
|
296 |
|
297 |
# Function to save uploaded files
|
298 |
+
def save_uploaded_files(files) -> List[str]:
|
299 |
+
"""Save uploaded files to temporary directory and return their paths."""
|
300 |
if not files:
|
301 |
return []
|
302 |
|
303 |
saved_paths = []
|
304 |
for file in files:
|
305 |
if file is not None:
|
306 |
+
# Extract file extension
|
307 |
+
file_extension = os.path.splitext(file.name)[1].lower()
|
308 |
+
|
309 |
+
# Only process PDF files
|
310 |
+
if file_extension != '.pdf':
|
311 |
+
continue
|
312 |
+
|
313 |
# Create a temporary file
|
314 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
|
315 |
+
# If file is a file object, write its content
|
316 |
+
if hasattr(file, 'read'):
|
317 |
+
tmp_file.write(file.read())
|
318 |
+
# If file is already a path
|
319 |
+
else:
|
320 |
+
with open(file, 'rb') as f:
|
321 |
+
tmp_file.write(f.read())
|
322 |
+
|
323 |
saved_paths.append(tmp_file.name)
|
324 |
|
325 |
return saved_paths
|
326 |
|
327 |
+
# Custom HTML and CSS for better UI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
css = """
|
329 |
<style>
|
330 |
+
/* Base styling */
|
331 |
+
body {
|
332 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
|
333 |
+
}
|
334 |
+
|
335 |
+
.container {
|
336 |
+
max-width: 1200px !important;
|
337 |
+
margin: 0 auto;
|
338 |
+
}
|
339 |
+
|
340 |
+
/* Header styling */
|
341 |
+
.header {
|
342 |
+
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
|
343 |
+
color: white;
|
344 |
+
padding: 20px;
|
345 |
+
border-radius: 10px;
|
346 |
+
margin-bottom: 20px;
|
347 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
348 |
+
}
|
349 |
+
|
350 |
+
/* Button styling */
|
351 |
#generate_button {
|
352 |
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
353 |
color: white;
|
354 |
font-weight: bold;
|
355 |
+
padding: 10px 20px;
|
356 |
+
border-radius: 8px;
|
357 |
+
border: none;
|
358 |
+
cursor: pointer;
|
359 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
360 |
+
transition: all 0.3s ease;
|
361 |
}
|
362 |
+
|
363 |
#generate_button:hover {
|
364 |
background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
|
365 |
+
transform: translateY(-2px);
|
366 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
|
367 |
}
|
368 |
+
|
369 |
#api_key_button {
|
370 |
background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
|
371 |
color: white;
|
372 |
font-weight: bold;
|
373 |
margin-top: 27px;
|
374 |
+
padding: 10px 20px;
|
375 |
+
border-radius: 8px;
|
376 |
+
border: none;
|
377 |
+
cursor: pointer;
|
378 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
379 |
+
transition: all 0.3s ease;
|
380 |
}
|
381 |
+
|
382 |
#api_key_button:hover {
|
383 |
background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
|
384 |
+
transform: translateY(-2px);
|
385 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
|
386 |
+
}
|
387 |
+
|
388 |
+
/* Card styling */
|
389 |
+
.card {
|
390 |
+
background-color: white;
|
391 |
+
border-radius: 10px;
|
392 |
+
padding: 20px;
|
393 |
+
margin-bottom: 20px;
|
394 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
395 |
+
}
|
396 |
+
|
397 |
+
/* Form styling */
|
398 |
+
.form-group {
|
399 |
+
margin-bottom: 15px;
|
400 |
}
|
401 |
+
|
402 |
+
/* Tabs styling */
|
403 |
+
.tab-content {
|
404 |
+
padding: 20px;
|
405 |
+
background-color: white;
|
406 |
+
border-radius: 0 0 10px 10px;
|
407 |
+
}
|
408 |
+
|
409 |
+
/* Table styling in output */
|
410 |
+
.output-container table {
|
411 |
+
border-collapse: collapse;
|
412 |
+
width: 100%;
|
413 |
+
margin: 20px 0;
|
414 |
+
}
|
415 |
+
|
416 |
+
.output-container th, .output-container td {
|
417 |
+
border: 1px solid #ddd;
|
418 |
+
padding: 8px;
|
419 |
+
text-align: left;
|
420 |
+
}
|
421 |
+
|
422 |
+
.output-container th {
|
423 |
+
background-color: #f2f2f2;
|
424 |
+
font-weight: bold;
|
425 |
+
}
|
426 |
+
|
427 |
+
.output-container tr:nth-child(even) {
|
428 |
+
background-color: #f9f9f9;
|
429 |
+
}
|
430 |
+
|
431 |
+
/* Spinner styling */
|
432 |
+
.loading-spinner {
|
433 |
+
display: inline-block;
|
434 |
+
width: 20px;
|
435 |
+
height: 20px;
|
436 |
+
border: 3px solid rgba(0, 0, 0, 0.1);
|
437 |
+
border-radius: 50%;
|
438 |
+
border-top-color: #4a00e0;
|
439 |
+
animation: spin 1s ease-in-out infinite;
|
440 |
+
}
|
441 |
+
|
442 |
+
@keyframes spin {
|
443 |
+
to {
|
444 |
+
transform: rotate(360deg);
|
445 |
+
}
|
446 |
+
}
|
447 |
+
|
448 |
+
/* Customizations for Gradio */
|
449 |
.gradio-container {
|
450 |
max-width: 1200px !important;
|
451 |
}
|
452 |
+
|
453 |
+
.gr-form, .gr-box {
|
454 |
+
border-radius: 10px !important;
|
455 |
+
}
|
456 |
+
|
457 |
+
.gr-input, .gr-textarea {
|
458 |
+
border-radius: 6px !important;
|
459 |
+
}
|
460 |
+
|
461 |
+
/* Responsive adjustments */
|
462 |
+
@media (max-width: 768px) {
|
463 |
+
.header {
|
464 |
+
padding: 15px;
|
465 |
+
}
|
466 |
+
|
467 |
+
#generate_button, #api_key_button {
|
468 |
+
padding: 8px 16px;
|
469 |
+
}
|
470 |
+
}
|
471 |
</style>
|
472 |
"""
|
473 |
|
474 |
+
# Add custom HTML header
|
475 |
+
header_html = """
|
476 |
+
<div class="header">
|
477 |
+
<h1>Systematic Review Generator for Research Papers</h1>
|
478 |
+
<p>Upload multiple PDF papers to generate comprehensive reviews, literature analyses, and meta-analyses</p>
|
479 |
+
</div>
|
480 |
+
"""
|
481 |
+
|
482 |
+
# Custom progress component
|
483 |
+
def progress_component(text, progress):
|
484 |
+
return f"""
|
485 |
+
<div style="margin: 10px 0; width: 100%;">
|
486 |
+
<div style="display: flex; align-items: center; margin-bottom: 5px;">
|
487 |
+
<div>{text}</div>
|
488 |
+
<div style="margin-left: auto;">{progress}%</div>
|
489 |
+
</div>
|
490 |
+
<div style="background-color: #e0e0e0; height: 8px; border-radius: 4px; width: 100%;">
|
491 |
+
<div style="background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); height: 100%; width: {progress}%; border-radius: 4px;"></div>
|
492 |
+
</div>
|
493 |
+
</div>
|
494 |
+
"""
|
495 |
+
|
496 |
+
# Function to create a review
|
497 |
+
def create_review(files, question, model, review_type, include_tables, temperature, max_tokens, progress=gr.Progress()):
|
498 |
+
try:
|
499 |
+
if not files:
|
500 |
+
return "Please upload at least one PDF file."
|
501 |
+
|
502 |
+
progress(0.1, desc="Saving uploaded files...")
|
503 |
+
saved_paths = save_uploaded_files(files)
|
504 |
+
|
505 |
+
if not saved_paths:
|
506 |
+
return "No valid PDF files were uploaded. Please upload PDF files only."
|
507 |
+
|
508 |
+
progress(0.3, desc="Processing PDFs...")
|
509 |
+
review = generate_systematic_review(
|
510 |
+
saved_paths,
|
511 |
+
question,
|
512 |
+
model=model,
|
513 |
+
review_type=review_type,
|
514 |
+
include_tables=include_tables,
|
515 |
+
temperature=temperature,
|
516 |
+
max_tokens=max_tokens
|
517 |
+
)
|
518 |
+
|
519 |
+
progress(0.9, desc="Finalizing review...")
|
520 |
+
|
521 |
+
# Clean up temporary files
|
522 |
+
for path in saved_paths:
|
523 |
+
try:
|
524 |
+
os.remove(path)
|
525 |
+
except Exception as e:
|
526 |
+
logger.error(f"Error removing temporary file {path}: {str(e)}")
|
527 |
+
|
528 |
+
progress(1.0, desc="Complete!")
|
529 |
+
return review
|
530 |
+
|
531 |
+
except Exception as e:
|
532 |
+
logger.error(f"Error in create_review: {str(e)}")
|
533 |
+
return f"An error occurred: {str(e)}"
|
534 |
+
|
535 |
+
# Gradio UI Layout
|
536 |
+
def create_ui():
|
537 |
+
with gr.Blocks(css=css) as demo:
|
538 |
+
gr.HTML(header_html)
|
539 |
+
|
540 |
+
with gr.Tabs() as tabs:
|
541 |
+
with gr.TabItem("Generate Review"):
|
542 |
+
with gr.Row():
|
543 |
+
with gr.Column(scale=1):
|
544 |
+
with gr.Box():
|
545 |
+
gr.Markdown("### 1. Setup API Key")
|
546 |
+
api_key_input = gr.Textbox(
|
547 |
+
label="Enter OpenAI API Key",
|
548 |
+
type="password",
|
549 |
+
placeholder="sk-..."
|
550 |
+
)
|
551 |
+
api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
|
552 |
+
api_key_output = gr.Textbox(
|
553 |
+
label="API Key Status",
|
554 |
+
interactive=False,
|
555 |
+
value="Not set"
|
556 |
+
)
|
557 |
+
|
558 |
+
with gr.Box():
|
559 |
+
gr.Markdown("### 2. Upload Papers")
|
560 |
+
pdf_files = gr.File(
|
561 |
+
label="Upload PDF Research Papers (PDF files only)",
|
562 |
+
file_count="multiple",
|
563 |
+
type="binary",
|
564 |
+
file_types=[".pdf"]
|
565 |
+
)
|
566 |
+
|
567 |
+
with gr.Column(scale=1):
|
568 |
+
with gr.Box():
|
569 |
+
gr.Markdown("### 3. Review Configuration")
|
570 |
+
review_question = gr.Textbox(
|
571 |
+
label="Review Question or Topic",
|
572 |
+
placeholder="What are the current advances in GAN applications for speech processing?",
|
573 |
+
lines=2
|
574 |
+
)
|
575 |
+
|
576 |
+
review_type = gr.Radio(
|
577 |
+
label="Review Type",
|
578 |
+
choices=["systematic", "literature", "meta-analysis"],
|
579 |
+
value="systematic"
|
580 |
+
)
|
581 |
+
|
582 |
+
model = gr.Dropdown(
|
583 |
+
label="Model",
|
584 |
+
choices=list(MODEL_OPTIONS.keys()),
|
585 |
+
value="gpt-4.1"
|
586 |
+
)
|
587 |
+
|
588 |
+
with gr.Row():
|
589 |
+
include_tables = gr.Checkbox(
|
590 |
+
label="Include Tables and Figures",
|
591 |
+
value=True
|
592 |
+
)
|
593 |
+
|
594 |
+
with gr.Column():
|
595 |
+
temperature = gr.Slider(
|
596 |
+
label="Temperature (Creativity)",
|
597 |
+
minimum=0.0,
|
598 |
+
maximum=1.0,
|
599 |
+
value=0.7,
|
600 |
+
step=0.1
|
601 |
+
)
|
602 |
+
|
603 |
+
max_tokens = gr.Slider(
|
604 |
+
label="Maximum Output Length",
|
605 |
+
minimum=1000,
|
606 |
+
maximum=8000,
|
607 |
+
value=4000,
|
608 |
+
step=500
|
609 |
+
)
|
610 |
+
|
611 |
+
generate_button = gr.Button(
|
612 |
+
"Generate Review",
|
613 |
+
elem_id="generate_button",
|
614 |
+
variant="primary"
|
615 |
+
)
|
616 |
+
|
617 |
+
# Output
|
618 |
+
with gr.Box():
|
619 |
+
gr.Markdown("### Review Output")
|
620 |
+
review_output = gr.Markdown(
|
621 |
+
label="Generated Review",
|
622 |
+
value="Review will appear here after generation..."
|
623 |
+
)
|
624 |
+
|
625 |
+
with gr.Row():
|
626 |
+
copy_button = gr.Button("📋 Copy to Clipboard")
|
627 |
+
export_button = gr.Button("📥 Export as Markdown")
|
628 |
+
|
629 |
+
with gr.TabItem("How to Use"):
|
630 |
+
gr.Markdown("""
|
631 |
+
### Getting Started with the Systematic Review Generator
|
632 |
+
|
633 |
+
#### 1. Setting Up
|
634 |
+
- Enter your OpenAI API key in the field provided and click "Set API Key"
|
635 |
+
- You'll need an API key with access to GPT-4 or GPT-3.5 for best results
|
636 |
+
- Your API key is never stored and is only used for this session
|
637 |
+
|
638 |
+
#### 2. Uploading Papers
|
639 |
+
- Upload 2 or more PDF research papers (the more related they are, the better)
|
640 |
+
- Only PDF files are supported
|
641 |
+
- Papers should ideally be related to the same research field
|
642 |
+
|
643 |
+
#### 3. Configuring Your Review
|
644 |
+
- Enter a specific review question or topic
|
645 |
+
- Choose the review type:
|
646 |
+
- **Systematic Review**: Follows a rigorous methodology to answer a specific research question
|
647 |
+
- **Literature Review**: Provides an overview of existing research on a topic
|
648 |
+
- **Meta-Analysis**: Combines and analyzes quantitative data from multiple studies
|
649 |
+
- Select the AI model (GPT-4 recommended for complex papers)
|
650 |
+
- Adjust temperature (higher = more creative, lower = more focused)
|
651 |
+
- Set maximum output length (longer reviews will be more comprehensive)
|
652 |
+
|
653 |
+
#### 4. Generating Your Review
|
654 |
+
- Click "Generate Review" to start the process
|
655 |
+
- Processing time depends on the number and size of papers, and the selected model
|
656 |
+
- You can copy or export the final review when complete
|
657 |
+
|
658 |
+
#### Tips for Best Results
|
659 |
+
- Use papers from the same field or on related topics
|
660 |
+
- Be specific in your review question
|
661 |
+
- For technical papers, choose GPT-4 for better comprehension
|
662 |
+
- The system works best with 2-5 related papers
|
663 |
+
- Consider using a lower temperature (0.3-0.5) for more factual reviews
|
664 |
+
""")
|
665 |
+
|
666 |
+
with gr.TabItem("About"):
|
667 |
+
gr.Markdown("""
|
668 |
+
### About the Systematic Review Generator
|
669 |
+
|
670 |
+
This application helps researchers, students, and academics generate comprehensive reviews of scientific papers. It leverages advanced AI to analyze PDF research papers and synthesize findings into structured, coherent reviews.
|
671 |
+
|
672 |
+
#### Features
|
673 |
+
- Support for multiple review types: systematic reviews, literature reviews, and meta-analyses
|
674 |
+
- Automatic extraction of text from PDF files
|
675 |
+
- Parallel processing of multiple papers
|
676 |
+
- Integration with OpenAI's GPT models
|
677 |
+
- Customizable output parameters
|
678 |
+
- Table and figure generation capabilities
|
679 |
+
|
680 |
+
#### How It Works
|
681 |
+
1. The system extracts text from your uploaded PDFs
|
682 |
+
2. It identifies the main topics, methodologies, and findings
|
683 |
+
3. Based on your review question, it synthesizes information across papers
|
684 |
+
4. It structures the information following academic review standards
|
685 |
+
5. It provides a comprehensive review with proper sections and references
|
686 |
+
|
687 |
+
#### Limitations
|
688 |
+
- The quality of the review depends on the clarity of the PDFs and their text extraction
|
689 |
+
- Complex scientific notation, tables, or images in PDFs may not be perfectly interpreted
|
690 |
+
- The system provides a starting point, not a final paper - always review and verify the output
|
691 |
+
- Token limits may prevent full analysis of very long or numerous papers
|
692 |
+
|
693 |
+
#### Privacy & Security
|
694 |
+
- Your API key is never stored and is only used for the current session
|
695 |
+
- Uploaded PDFs are processed temporarily and deleted after review generation
|
696 |
+
- No data is retained after you close the application
|
697 |
+
""")
|
698 |
+
|
699 |
+
# Button actions
|
700 |
+
api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
|
701 |
+
|
702 |
+
generate_button.click(
|
703 |
+
create_review,
|
704 |
+
inputs=[pdf_files, review_question, model, review_type, include_tables, temperature, max_tokens],
|
705 |
+
outputs=[review_output]
|
706 |
+
)
|
707 |
+
|
708 |
+
# Function to refresh model list
|
709 |
+
def refresh_models():
|
710 |
+
return gr.Dropdown.update(choices=get_available_models())
|
711 |
+
|
712 |
+
api_key_button.click(refresh_models, outputs=[model])
|
713 |
+
|
714 |
+
# Copy function is handled client-side via JavaScript
|
715 |
+
|
716 |
+
return demo
|
717 |
+
|
718 |
# Launch the app
|
719 |
if __name__ == "__main__":
|
720 |
+
demo = create_ui()
|
721 |
demo.launch(share=True)
|