shukdevdatta123 commited on
Commit
99fac0a
·
verified ·
1 Parent(s): 7ab84cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +615 -121
app.py CHANGED
@@ -3,225 +3,719 @@ import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import os
5
  import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Variable to store API key
8
  api_key = ""
9
 
10
  # Function to update API key
11
- def set_api_key(key):
 
12
  global api_key
13
- api_key = key
14
- return "API Key Set Successfully!"
 
 
 
15
 
16
  # Function to extract text from PDF
17
- def extract_text_from_pdf(pdf_path):
 
18
  try:
19
  doc = fitz.open(pdf_path)
20
- text = "\n".join([page.get_text("text") for page in doc])
 
 
 
21
  return text
22
  except Exception as e:
 
23
  return f"Error extracting text from PDF: {str(e)}"
24
 
25
- # Function to interact with OpenAI API for systematic review
26
- def generate_systematic_review(pdf_files, review_question, include_tables=True):
27
- if not api_key:
28
- return "Please enter your OpenAI API key first."
29
-
30
- if not pdf_files:
31
- return "Please upload at least one PDF file."
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- if not review_question:
34
- return "Please enter a review question."
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  try:
37
  openai.api_key = api_key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # Create the system message with systematic review guidelines
40
- system_prompt = """
41
  Step 1: Identify a Research Field
42
- The first step in writing a systematic review paper is to identify a research field. This involves selecting a specific area of study that you are interested in and want to explore further.
43
 
44
  Step 2: Generate a Research Question
45
- Once you have identified your research field, the next step is to generate a research question. This question should be specific, measurable, achievable, relevant, and time-bound (SMART).
46
 
47
  Step 3: Create a Protocol
48
- After generating your research question, the next step is to create a protocol. A detailed plan of how you will conduct your research, including the methods you will use, the data you will collect, and the analysis you will perform.
49
 
50
  Step 4: Evaluate Relevant Literature
51
- The fourth step is to evaluate relevant literature. This involves searching for and reviewing existing studies related to your research question. You should critically evaluate the quality of these studies and identify any gaps or limitations in the current literature.
52
 
53
  Step 5: Investigate Sources for Answers
54
- The fifth step is to investigate sources for answers. This involves searching for and accessing relevant data and information that will help you answer your research question.
55
 
56
  Step 6: Collect Data as per Protocol
57
- The sixth step is to collect data as per protocol. This involves implementing the methods outlined in your protocol and collecting the data specified. You should ensure that your data collection methods are rigorous and reliable.
58
 
59
  Step 7: Data Extraction
60
- The seventh step is to extract the data. This involves organizing and analyzing the data you have collected, and extracting the relevant information that will help you answer your research question.
61
 
62
  Step 8: Critical Analysis of Results
63
- The eighth step is to conduct a critical analysis of your results. This involves interpreting your findings, identifying patterns and trends, and drawing conclusions based on your data.
64
 
65
  Step 9: Interpreting Derivations
66
- The ninth step is to interpret the derivations. This involves taking the conclusions you have drawn from your data and interpreting them in the context of your research question.
67
 
68
  Step 10: Concluding Statements
69
- The final step is to make concluding statements. This involves summarizing your findings and drawing conclusions based on your research. You should also provide recommendations for future research and implications for practice.
 
 
 
70
 
71
- Step-11:
72
- Please include references in the form of citation and also link to the reference papers.
 
 
 
 
73
  """
 
 
 
74
 
75
- # Extract text from each PDF
76
- pdf_texts = []
77
- pdf_names = []
 
 
 
 
 
78
 
79
- for pdf_file in pdf_files:
80
- if isinstance(pdf_file, str): # If it's already a path
81
- pdf_path = pdf_file
82
- else: # If it's a file object
83
- pdf_path = pdf_file.name
84
-
85
- pdf_name = os.path.basename(pdf_path)
86
- pdf_text = extract_text_from_pdf(pdf_path)
87
-
88
- pdf_texts.append(pdf_text)
89
- pdf_names.append(pdf_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Prepare the user prompt with the review question and instructions
92
  table_instruction = ""
93
  if include_tables:
94
- table_instruction = " Please include important new generated tables in your review."
95
 
96
- user_prompt = f"Please generate a systematic review of the following {len(pdf_files)} papers: {', '.join(pdf_names)}.{table_instruction}\n\nReview Question: {review_question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  # Create the messages for the API call
99
  messages = [
100
  {"role": "system", "content": system_prompt},
101
- {"role": "user", "content": user_prompt + "\n\n" + "\n\n".join([f"Paper {i+1} - {pdf_names[i]}:\n{pdf_texts[i]}" for i in range(len(pdf_texts))])}
102
  ]
103
 
104
- # Call the API with temperature=1 and top_p=1 as specified
 
 
105
  response = openai.ChatCompletion.create(
106
- model="gpt-4.1",
107
  messages=messages,
108
- temperature=1,
109
- top_p=1,
110
- max_tokens=2048
111
  )
112
 
113
- return response["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
114
 
115
  except Exception as e:
 
116
  return f"Error generating systematic review: {str(e)}"
117
 
118
  # Function to save uploaded files
119
- def save_uploaded_files(files):
 
120
  if not files:
121
  return []
122
 
123
  saved_paths = []
124
  for file in files:
125
  if file is not None:
 
 
 
 
 
 
 
126
  # Create a temporary file
127
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
128
- tmp_file.write(file)
 
 
 
 
 
 
 
129
  saved_paths.append(tmp_file.name)
130
 
131
  return saved_paths
132
 
133
- # Gradio UI Layout
134
- with gr.Blocks() as demo:
135
- gr.Markdown("# Systematic Review Generator for Research Papers")
136
-
137
- with gr.Accordion("How to Use This App", open=True):
138
- gr.Markdown("""
139
- ### Getting Started:
140
- 1. Enter your OpenAI API key in the field below and click "Set API Key"
141
- 2. Upload multiple PDF research papers (2 or more recommended)
142
- 3. Enter your review question or topic
143
- 4. Check the "Include Tables" option if you want the review to include comparison tables
144
- 5. Click "Generate Systematic Review" to start the process
145
-
146
- ### Tips:
147
- - For best results, upload papers that are related to the same research topic or field
148
- - Be specific in your review question to get more focused results
149
- - The generated review will follow a systematic structure including research field identification, data extraction, analysis, and conclusions
150
- - The more papers you upload, the more comprehensive the review will be
151
- """)
152
-
153
- # API Key Input
154
- with gr.Row():
155
- api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
156
- api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
157
- api_key_output = gr.Textbox(label="API Key Status", interactive=False)
158
-
159
- # PDF Upload and Review Settings
160
- with gr.Row():
161
- with gr.Column():
162
- pdf_files = gr.File(label="Upload PDF Research Papers", file_count="multiple", type="binary")
163
- review_question = gr.Textbox(label="Review Question or Topic", placeholder="What are the current advances in GAN applications for speech processing?")
164
- include_tables = gr.Checkbox(label="Include Comparison Tables", value=True)
165
- generate_button = gr.Button("Generate Systematic Review", elem_id="generate_button")
166
-
167
- # Output
168
- review_output = gr.Textbox(label="Systematic Review", interactive=False, lines=20)
169
-
170
- # Button actions
171
- api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
172
-
173
- # Generate systematic review
174
- def process_files_and_generate_review(files, question, include_tables):
175
- if not files:
176
- return "Please upload at least one PDF file."
177
-
178
- # Save uploaded files
179
- saved_paths = save_uploaded_files(files)
180
-
181
- # Generate review
182
- review = generate_systematic_review(saved_paths, question, include_tables)
183
-
184
- # Clean up temporary files
185
- for path in saved_paths:
186
- try:
187
- os.remove(path)
188
- except:
189
- pass
190
-
191
- return review
192
-
193
- generate_button.click(
194
- process_files_and_generate_review,
195
- inputs=[pdf_files, review_question, include_tables],
196
- outputs=[review_output]
197
- )
198
-
199
- # Add CSS styling
200
  css = """
201
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  #generate_button {
203
  background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
204
  color: white;
205
  font-weight: bold;
 
 
 
 
 
 
206
  }
 
207
  #generate_button:hover {
208
  background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
 
 
209
  }
 
210
  #api_key_button {
211
  background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
212
  color: white;
213
  font-weight: bold;
214
  margin-top: 27px;
 
 
 
 
 
 
215
  }
 
216
  #api_key_button:hover {
217
  background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  .gradio-container {
220
  max-width: 1200px !important;
221
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  </style>
223
  """
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # Launch the app
226
  if __name__ == "__main__":
 
227
  demo.launch(share=True)
 
3
  import fitz # PyMuPDF for PDF processing
4
  import os
5
  import tempfile
6
+ import time
7
+ import logging
8
+ import re
9
+ from typing import List, Optional, Dict, Any, Union
10
+ import markdown
11
+ import concurrent.futures
12
+
13
+ # Set up logging
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
+ )
18
+ logger = logging.getLogger(__name__)
19
 
20
  # Variable to store API key
21
  api_key = ""
22
 
23
  # Function to update API key
24
+ def set_api_key(key: str) -> str:
25
+ """Set the OpenAI API key."""
26
  global api_key
27
+ if not key.strip():
28
+ return "Please enter a valid API key"
29
+
30
+ api_key = key.strip()
31
+ return "✅ API Key Set Successfully!"
32
 
33
  # Function to extract text from PDF
34
+ def extract_text_from_pdf(pdf_path: str) -> str:
35
+ """Extract text content from a PDF file."""
36
  try:
37
  doc = fitz.open(pdf_path)
38
+ text = ""
39
+ for page_num, page in enumerate(doc):
40
+ text += f"\n--- Page {page_num + 1} ---\n"
41
+ text += page.get_text("text")
42
  return text
43
  except Exception as e:
44
+ logger.error(f"Error extracting text from PDF: {str(e)}")
45
  return f"Error extracting text from PDF: {str(e)}"
46
 
47
+ # Function to truncate text to fit token limits
48
+ def truncate_text_for_tokens(text: str, max_tokens: int = 8000) -> str:
49
+ """Truncate text to approximately fit within token limits."""
50
+ # Rough approximation: 1 token 4 characters in English
51
+ char_limit = max_tokens * 4
52
+ if len(text) > char_limit:
53
+ return text[:char_limit] + "\n[Content truncated due to length...]"
54
+ return text
55
+
56
+ # Function to extract title from PDF content
57
+ def extract_title(pdf_text: str) -> str:
58
+ """Attempt to extract a title from PDF text."""
59
+ # Look for title in first few lines
60
+ first_lines = pdf_text.split('\n')[:10]
61
+ for line in first_lines:
62
+ line = line.strip()
63
+ # Title candidates: all caps, longer than 5 chars, shorter than 200
64
+ if len(line) > 5 and len(line) < 200 and not line.startswith('---'):
65
+ return line
66
 
67
+ return "Untitled Document"
68
+
69
+ # Model selection options
70
+ MODEL_OPTIONS = {
71
+ "gpt-4.1": "GPT-4 (Most powerful, slower)",
72
+ "gpt-3.5-turbo": "GPT-3.5 Turbo (Faster, less powerful)"
73
+ }
74
+
75
+ # Function to get available OpenAI models
76
+ def get_available_models() -> List[str]:
77
+ """Get list of available OpenAI models."""
78
+ if not api_key:
79
+ return list(MODEL_OPTIONS.keys())
80
 
81
  try:
82
  openai.api_key = api_key
83
+ response = openai.Model.list()
84
+ models = [model.id for model in response['data'] if 'gpt' in model.id.lower()]
85
+ # Add to our options if found
86
+ for model in models:
87
+ if model not in MODEL_OPTIONS and ('gpt-4.1' in model or 'gpt-3.5-turbo' in model):
88
+ MODEL_OPTIONS[model] = model
89
+ return list(MODEL_OPTIONS.keys())
90
+ except Exception as e:
91
+ logger.error(f"Error fetching models: {str(e)}")
92
+ return list(MODEL_OPTIONS.keys())
93
+
94
+ # Function for parallel PDF processing
95
+ def process_pdf_in_parallel(pdf_files: List[str]) -> List[tuple]:
96
+ """Process multiple PDFs in parallel to extract text."""
97
+ results = []
98
+
99
+ with concurrent.futures.ThreadPoolExecutor() as executor:
100
+ future_to_pdf = {executor.submit(extract_text_from_pdf, pdf_path): pdf_path for pdf_path in pdf_files}
101
+ for future in concurrent.futures.as_completed(future_to_pdf):
102
+ pdf_path = future_to_pdf[future]
103
+ pdf_name = os.path.basename(pdf_path)
104
+ try:
105
+ pdf_text = future.result()
106
+ # Truncate if needed
107
+ pdf_text = truncate_text_for_tokens(pdf_text)
108
+ results.append((pdf_name, pdf_text))
109
+ except Exception as e:
110
+ logger.error(f"Error processing {pdf_name}: {str(e)}")
111
+ results.append((pdf_name, f"Error processing file: {str(e)}"))
112
+
113
+ return results
114
+
115
+ # Function to create system prompt
116
+ def create_system_prompt(review_type: str = "systematic") -> str:
117
+ """Create system prompt based on review type."""
118
+ if review_type == "systematic":
119
+ return """
120
+ You are an expert academic researcher tasked with creating comprehensive systematic reviews. Follow these steps:
121
 
 
 
122
  Step 1: Identify a Research Field
123
+ Identify the specific area of study represented in the provided papers.
124
 
125
  Step 2: Generate a Research Question
126
+ Create a specific, measurable, achievable, relevant, and time-bound (SMART) research question that unifies the papers.
127
 
128
  Step 3: Create a Protocol
129
+ Outline a detailed methodology for your review, including analysis methods appropriate for the papers.
130
 
131
  Step 4: Evaluate Relevant Literature
132
+ Critically evaluate the quality, methodology, and findings of the provided papers, identifying gaps or limitations.
133
 
134
  Step 5: Investigate Sources for Answers
135
+ Examine how the papers contribute to answering the research question.
136
 
137
  Step 6: Collect Data as per Protocol
138
+ Implement rigorous data collection methods, extracting key findings and statistics.
139
 
140
  Step 7: Data Extraction
141
+ Organize the extracted data in a structured format, including tables where appropriate.
142
 
143
  Step 8: Critical Analysis of Results
144
+ Interpret patterns, trends, and conclusions from the data, comparing findings across papers.
145
 
146
  Step 9: Interpreting Derivations
147
+ Contextualize the findings in relation to the research question and broader field.
148
 
149
  Step 10: Concluding Statements
150
+ Summarize findings, draw conclusions, and provide recommendations for future research.
151
+
152
+ Step 11: References
153
+ Include proper citations for all papers reviewed and any additional references.
154
 
155
+ Your review should be:
156
+ - Comprehensive yet concise
157
+ - Well-structured with clear headings and subheadings
158
+ - Using academic language appropriate for a scholarly audience
159
+ - Including data visualizations or tables where helpful
160
+ - Balanced and objective in evaluating the evidence
161
  """
162
+ elif review_type == "literature":
163
+ return """
164
+ You are an expert academic researcher tasked with creating a thorough literature review. Your review should:
165
 
166
+ 1. Provide an overview of the current state of knowledge in the specific field
167
+ 2. Identify common themes, methodologies, and findings across the papers
168
+ 3. Highlight contradictions or inconsistencies in the literature
169
+ 4. Evaluate the strength of evidence for key claims
170
+ 5. Identify research gaps and future directions
171
+ 6. Organize findings in a logical, thematic structure
172
+ 7. Include visual elements (tables, concept maps) to synthesize information
173
+ 8. Maintain academic rigor and proper attribution
174
 
175
+ Your review should be scholarly in tone, well-organized, and provide a balanced assessment of the literature.
176
+ """
177
+ else: # meta-analysis
178
+ return """
179
+ You are an expert researcher conducting a meta-analysis of the provided papers. Your analysis should:
180
+
181
+ 1. Identify a precise research question that can be answered quantitatively
182
+ 2. Extract comparable quantitative data, effect sizes, or statistics from the papers
183
+ 3. Assess the methodological quality and risk of bias in each study
184
+ 4. Synthesize findings using appropriate statistical methods
185
+ 5. Present results using forest plots, funnel plots, or other visualizations
186
+ 6. Discuss heterogeneity and its potential sources
187
+ 7. Evaluate publication bias and its impact on the findings
188
+ 8. Draw conclusions based on the pooled data
189
+ 9. Discuss implications for practice and future research
190
+
191
+ Your meta-analysis should follow PRISMA guidelines where applicable, maintain statistical rigor, and provide clear visual representations of the quantitative synthesis.
192
+ """
193
+
194
+ # Function to interact with OpenAI API for systematic review
195
+ def generate_systematic_review(
196
+ pdf_files: List[str],
197
+ review_question: str,
198
+ model: str = "gpt-4.1",
199
+ review_type: str = "systematic",
200
+ include_tables: bool = True,
201
+ temperature: float = 0.7,
202
+ max_tokens: int = 4000
203
+ ) -> str:
204
+ """Generate a systematic review of the provided PDF files."""
205
+ if not api_key:
206
+ return "Please enter your OpenAI API key first."
207
+
208
+ if not pdf_files:
209
+ return "Please upload at least one PDF file."
210
+
211
+ if not review_question:
212
+ return "Please enter a review question."
213
+
214
+ try:
215
+ # Start timer
216
+ start_time = time.time()
217
+
218
+ openai.api_key = api_key
219
+
220
+ # Create the system message with review guidelines
221
+ system_prompt = create_system_prompt(review_type)
222
+
223
+ # Process PDFs in parallel
224
+ logger.info(f"Processing {len(pdf_files)} PDFs...")
225
+ pdf_results = process_pdf_in_parallel(pdf_files)
226
+
227
+ # Extract titles for reference
228
+ titles = [extract_title(pdf_text) for _, pdf_text in pdf_results]
229
+ pdf_names = [name for name, _ in pdf_results]
230
 
231
  # Prepare the user prompt with the review question and instructions
232
  table_instruction = ""
233
  if include_tables:
234
+ table_instruction = " Please include important tables, charts or figures in your review to help summarize the findings."
235
 
236
+ user_prompt = f"""
237
+ Please generate a {review_type} review of the following {len(pdf_files)} papers:
238
+ {', '.join([f"{i+1}. {pdf_names[i]} (Title: {titles[i]})" for i in range(len(pdf_names))])}
239
+
240
+ Review Question: {review_question}
241
+
242
+ {table_instruction}
243
+
244
+ Format your response with clear headings, subheadings, and properly formatted tables using markdown syntax.
245
+ """
246
+
247
+ # Combine PDF texts, with truncation if needed
248
+ combined_pdf_text = ""
249
+ total_chars = 0
250
+ max_chars = 20000 # Rough approximation to fit within token limits
251
+
252
+ for i, (pdf_name, pdf_text) in enumerate(pdf_results):
253
+ header = f"\n\n--- PAPER {i+1}: {pdf_name} ---\n\n"
254
+ if total_chars + len(header) + len(pdf_text) > max_chars:
255
+ # Truncate this paper's text
256
+ remaining = max_chars - total_chars - len(header)
257
+ if remaining > 500: # Only add if we can include meaningful content
258
+ truncated_text = pdf_text[:remaining] + "\n[... Content truncated due to length limitations ...]"
259
+ combined_pdf_text += header + truncated_text
260
+ total_chars += len(header) + len(truncated_text)
261
+ break
262
+ else:
263
+ combined_pdf_text += header + pdf_text
264
+ total_chars += len(header) + len(pdf_text)
265
 
266
  # Create the messages for the API call
267
  messages = [
268
  {"role": "system", "content": system_prompt},
269
+ {"role": "user", "content": user_prompt + combined_pdf_text}
270
  ]
271
 
272
+ logger.info(f"Sending request to OpenAI API (model: {model})...")
273
+
274
+ # Call the API
275
  response = openai.ChatCompletion.create(
276
+ model=model,
277
  messages=messages,
278
+ temperature=temperature,
279
+ max_tokens=max_tokens
 
280
  )
281
 
282
+ result = response["choices"][0]["message"]["content"]
283
+
284
+ # Convert markdown to HTML for tables
285
+ result_html = markdown.markdown(result, extensions=['tables'])
286
+
287
+ # Calculate time taken
288
+ time_taken = time.time() - start_time
289
+ logger.info(f"Review generated in {time_taken:.2f} seconds")
290
+
291
+ return result
292
 
293
  except Exception as e:
294
+ logger.error(f"Error generating review: {str(e)}")
295
  return f"Error generating systematic review: {str(e)}"
296
 
297
  # Function to save uploaded files
298
+ def save_uploaded_files(files) -> List[str]:
299
+ """Save uploaded files to temporary directory and return their paths."""
300
  if not files:
301
  return []
302
 
303
  saved_paths = []
304
  for file in files:
305
  if file is not None:
306
+ # Extract file extension
307
+ file_extension = os.path.splitext(file.name)[1].lower()
308
+
309
+ # Only process PDF files
310
+ if file_extension != '.pdf':
311
+ continue
312
+
313
  # Create a temporary file
314
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
315
+ # If file is a file object, write its content
316
+ if hasattr(file, 'read'):
317
+ tmp_file.write(file.read())
318
+ # If file is already a path
319
+ else:
320
+ with open(file, 'rb') as f:
321
+ tmp_file.write(f.read())
322
+
323
  saved_paths.append(tmp_file.name)
324
 
325
  return saved_paths
326
 
327
+ # Custom HTML and CSS for better UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  css = """
329
  <style>
330
+ /* Base styling */
331
+ body {
332
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
333
+ }
334
+
335
+ .container {
336
+ max-width: 1200px !important;
337
+ margin: 0 auto;
338
+ }
339
+
340
+ /* Header styling */
341
+ .header {
342
+ background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
343
+ color: white;
344
+ padding: 20px;
345
+ border-radius: 10px;
346
+ margin-bottom: 20px;
347
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
348
+ }
349
+
350
+ /* Button styling */
351
  #generate_button {
352
  background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
353
  color: white;
354
  font-weight: bold;
355
+ padding: 10px 20px;
356
+ border-radius: 8px;
357
+ border: none;
358
+ cursor: pointer;
359
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
360
+ transition: all 0.3s ease;
361
  }
362
+
363
  #generate_button:hover {
364
  background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
365
+ transform: translateY(-2px);
366
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
367
  }
368
+
369
  #api_key_button {
370
  background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
371
  color: white;
372
  font-weight: bold;
373
  margin-top: 27px;
374
+ padding: 10px 20px;
375
+ border-radius: 8px;
376
+ border: none;
377
+ cursor: pointer;
378
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
379
+ transition: all 0.3s ease;
380
  }
381
+
382
  #api_key_button:hover {
383
  background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green */
384
+ transform: translateY(-2px);
385
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
386
+ }
387
+
388
+ /* Card styling */
389
+ .card {
390
+ background-color: white;
391
+ border-radius: 10px;
392
+ padding: 20px;
393
+ margin-bottom: 20px;
394
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
395
+ }
396
+
397
+ /* Form styling */
398
+ .form-group {
399
+ margin-bottom: 15px;
400
  }
401
+
402
+ /* Tabs styling */
403
+ .tab-content {
404
+ padding: 20px;
405
+ background-color: white;
406
+ border-radius: 0 0 10px 10px;
407
+ }
408
+
409
+ /* Table styling in output */
410
+ .output-container table {
411
+ border-collapse: collapse;
412
+ width: 100%;
413
+ margin: 20px 0;
414
+ }
415
+
416
+ .output-container th, .output-container td {
417
+ border: 1px solid #ddd;
418
+ padding: 8px;
419
+ text-align: left;
420
+ }
421
+
422
+ .output-container th {
423
+ background-color: #f2f2f2;
424
+ font-weight: bold;
425
+ }
426
+
427
+ .output-container tr:nth-child(even) {
428
+ background-color: #f9f9f9;
429
+ }
430
+
431
+ /* Spinner styling */
432
+ .loading-spinner {
433
+ display: inline-block;
434
+ width: 20px;
435
+ height: 20px;
436
+ border: 3px solid rgba(0, 0, 0, 0.1);
437
+ border-radius: 50%;
438
+ border-top-color: #4a00e0;
439
+ animation: spin 1s ease-in-out infinite;
440
+ }
441
+
442
+ @keyframes spin {
443
+ to {
444
+ transform: rotate(360deg);
445
+ }
446
+ }
447
+
448
+ /* Customizations for Gradio */
449
  .gradio-container {
450
  max-width: 1200px !important;
451
  }
452
+
453
+ .gr-form, .gr-box {
454
+ border-radius: 10px !important;
455
+ }
456
+
457
+ .gr-input, .gr-textarea {
458
+ border-radius: 6px !important;
459
+ }
460
+
461
+ /* Responsive adjustments */
462
+ @media (max-width: 768px) {
463
+ .header {
464
+ padding: 15px;
465
+ }
466
+
467
+ #generate_button, #api_key_button {
468
+ padding: 8px 16px;
469
+ }
470
+ }
471
  </style>
472
  """
473
 
474
+ # Add custom HTML header
475
+ header_html = """
476
+ <div class="header">
477
+ <h1>Systematic Review Generator for Research Papers</h1>
478
+ <p>Upload multiple PDF papers to generate comprehensive reviews, literature analyses, and meta-analyses</p>
479
+ </div>
480
+ """
481
+
482
+ # Custom progress component
483
+ def progress_component(text, progress):
484
+ return f"""
485
+ <div style="margin: 10px 0; width: 100%;">
486
+ <div style="display: flex; align-items: center; margin-bottom: 5px;">
487
+ <div>{text}</div>
488
+ <div style="margin-left: auto;">{progress}%</div>
489
+ </div>
490
+ <div style="background-color: #e0e0e0; height: 8px; border-radius: 4px; width: 100%;">
491
+ <div style="background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); height: 100%; width: {progress}%; border-radius: 4px;"></div>
492
+ </div>
493
+ </div>
494
+ """
495
+
496
+ # Function to create a review
497
+ def create_review(files, question, model, review_type, include_tables, temperature, max_tokens, progress=gr.Progress()):
498
+ try:
499
+ if not files:
500
+ return "Please upload at least one PDF file."
501
+
502
+ progress(0.1, desc="Saving uploaded files...")
503
+ saved_paths = save_uploaded_files(files)
504
+
505
+ if not saved_paths:
506
+ return "No valid PDF files were uploaded. Please upload PDF files only."
507
+
508
+ progress(0.3, desc="Processing PDFs...")
509
+ review = generate_systematic_review(
510
+ saved_paths,
511
+ question,
512
+ model=model,
513
+ review_type=review_type,
514
+ include_tables=include_tables,
515
+ temperature=temperature,
516
+ max_tokens=max_tokens
517
+ )
518
+
519
+ progress(0.9, desc="Finalizing review...")
520
+
521
+ # Clean up temporary files
522
+ for path in saved_paths:
523
+ try:
524
+ os.remove(path)
525
+ except Exception as e:
526
+ logger.error(f"Error removing temporary file {path}: {str(e)}")
527
+
528
+ progress(1.0, desc="Complete!")
529
+ return review
530
+
531
+ except Exception as e:
532
+ logger.error(f"Error in create_review: {str(e)}")
533
+ return f"An error occurred: {str(e)}"
534
+
535
+ # Gradio UI Layout
536
+ def create_ui():
537
+ with gr.Blocks(css=css) as demo:
538
+ gr.HTML(header_html)
539
+
540
+ with gr.Tabs() as tabs:
541
+ with gr.TabItem("Generate Review"):
542
+ with gr.Row():
543
+ with gr.Column(scale=1):
544
+ with gr.Box():
545
+ gr.Markdown("### 1. Setup API Key")
546
+ api_key_input = gr.Textbox(
547
+ label="Enter OpenAI API Key",
548
+ type="password",
549
+ placeholder="sk-..."
550
+ )
551
+ api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
552
+ api_key_output = gr.Textbox(
553
+ label="API Key Status",
554
+ interactive=False,
555
+ value="Not set"
556
+ )
557
+
558
+ with gr.Box():
559
+ gr.Markdown("### 2. Upload Papers")
560
+ pdf_files = gr.File(
561
+ label="Upload PDF Research Papers (PDF files only)",
562
+ file_count="multiple",
563
+ type="binary",
564
+ file_types=[".pdf"]
565
+ )
566
+
567
+ with gr.Column(scale=1):
568
+ with gr.Box():
569
+ gr.Markdown("### 3. Review Configuration")
570
+ review_question = gr.Textbox(
571
+ label="Review Question or Topic",
572
+ placeholder="What are the current advances in GAN applications for speech processing?",
573
+ lines=2
574
+ )
575
+
576
+ review_type = gr.Radio(
577
+ label="Review Type",
578
+ choices=["systematic", "literature", "meta-analysis"],
579
+ value="systematic"
580
+ )
581
+
582
+ model = gr.Dropdown(
583
+ label="Model",
584
+ choices=list(MODEL_OPTIONS.keys()),
585
+ value="gpt-4.1"
586
+ )
587
+
588
+ with gr.Row():
589
+ include_tables = gr.Checkbox(
590
+ label="Include Tables and Figures",
591
+ value=True
592
+ )
593
+
594
+ with gr.Column():
595
+ temperature = gr.Slider(
596
+ label="Temperature (Creativity)",
597
+ minimum=0.0,
598
+ maximum=1.0,
599
+ value=0.7,
600
+ step=0.1
601
+ )
602
+
603
+ max_tokens = gr.Slider(
604
+ label="Maximum Output Length",
605
+ minimum=1000,
606
+ maximum=8000,
607
+ value=4000,
608
+ step=500
609
+ )
610
+
611
+ generate_button = gr.Button(
612
+ "Generate Review",
613
+ elem_id="generate_button",
614
+ variant="primary"
615
+ )
616
+
617
+ # Output
618
+ with gr.Box():
619
+ gr.Markdown("### Review Output")
620
+ review_output = gr.Markdown(
621
+ label="Generated Review",
622
+ value="Review will appear here after generation..."
623
+ )
624
+
625
+ with gr.Row():
626
+ copy_button = gr.Button("📋 Copy to Clipboard")
627
+ export_button = gr.Button("📥 Export as Markdown")
628
+
629
+ with gr.TabItem("How to Use"):
630
+ gr.Markdown("""
631
+ ### Getting Started with the Systematic Review Generator
632
+
633
+ #### 1. Setting Up
634
+ - Enter your OpenAI API key in the field provided and click "Set API Key"
635
+ - You'll need an API key with access to GPT-4 or GPT-3.5 for best results
636
+ - Your API key is never stored and is only used for this session
637
+
638
+ #### 2. Uploading Papers
639
+ - Upload 2 or more PDF research papers (the more related they are, the better)
640
+ - Only PDF files are supported
641
+ - Papers should ideally be related to the same research field
642
+
643
+ #### 3. Configuring Your Review
644
+ - Enter a specific review question or topic
645
+ - Choose the review type:
646
+ - **Systematic Review**: Follows a rigorous methodology to answer a specific research question
647
+ - **Literature Review**: Provides an overview of existing research on a topic
648
+ - **Meta-Analysis**: Combines and analyzes quantitative data from multiple studies
649
+ - Select the AI model (GPT-4 recommended for complex papers)
650
+ - Adjust temperature (higher = more creative, lower = more focused)
651
+ - Set maximum output length (longer reviews will be more comprehensive)
652
+
653
+ #### 4. Generating Your Review
654
+ - Click "Generate Review" to start the process
655
+ - Processing time depends on the number and size of papers, and the selected model
656
+ - You can copy or export the final review when complete
657
+
658
+ #### Tips for Best Results
659
+ - Use papers from the same field or on related topics
660
+ - Be specific in your review question
661
+ - For technical papers, choose GPT-4 for better comprehension
662
+ - The system works best with 2-5 related papers
663
+ - Consider using a lower temperature (0.3-0.5) for more factual reviews
664
+ """)
665
+
666
+ with gr.TabItem("About"):
667
+ gr.Markdown("""
668
+ ### About the Systematic Review Generator
669
+
670
+ This application helps researchers, students, and academics generate comprehensive reviews of scientific papers. It leverages advanced AI to analyze PDF research papers and synthesize findings into structured, coherent reviews.
671
+
672
+ #### Features
673
+ - Support for multiple review types: systematic reviews, literature reviews, and meta-analyses
674
+ - Automatic extraction of text from PDF files
675
+ - Parallel processing of multiple papers
676
+ - Integration with OpenAI's GPT models
677
+ - Customizable output parameters
678
+ - Table and figure generation capabilities
679
+
680
+ #### How It Works
681
+ 1. The system extracts text from your uploaded PDFs
682
+ 2. It identifies the main topics, methodologies, and findings
683
+ 3. Based on your review question, it synthesizes information across papers
684
+ 4. It structures the information following academic review standards
685
+ 5. It provides a comprehensive review with proper sections and references
686
+
687
+ #### Limitations
688
+ - The quality of the review depends on the clarity of the PDFs and their text extraction
689
+ - Complex scientific notation, tables, or images in PDFs may not be perfectly interpreted
690
+ - The system provides a starting point, not a final paper - always review and verify the output
691
+ - Token limits may prevent full analysis of very long or numerous papers
692
+
693
+ #### Privacy & Security
694
+ - Your API key is never stored and is only used for the current session
695
+ - Uploaded PDFs are processed temporarily and deleted after review generation
696
+ - No data is retained after you close the application
697
+ """)
698
+
699
+ # Button actions
700
+ api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
701
+
702
+ generate_button.click(
703
+ create_review,
704
+ inputs=[pdf_files, review_question, model, review_type, include_tables, temperature, max_tokens],
705
+ outputs=[review_output]
706
+ )
707
+
708
+ # Function to refresh model list
709
+ def refresh_models():
710
+ return gr.Dropdown.update(choices=get_available_models())
711
+
712
+ api_key_button.click(refresh_models, outputs=[model])
713
+
714
+ # Copy function is handled client-side via JavaScript
715
+
716
+ return demo
717
+
718
  # Launch the app
719
  if __name__ == "__main__":
720
+ demo = create_ui()
721
  demo.launch(share=True)