acecalisto3 commited on
Commit
0fb0a58
·
verified ·
1 Parent(s): 8e7dfc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -37
app.py CHANGED
@@ -165,51 +165,52 @@ class FileProcessor:
165
  except Exception:
166
  return False
167
 
168
- def process_file(self, file) -> List[Dict]:
169
- """Process uploaded file with enhanced error handling"""
170
- if not file:
171
  return []
172
 
173
- dataset = []
174
  try:
175
- file_size = os.path.getsize(file.name)
176
- if file_size > self.max_file_size:
177
- logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
178
- return [{"error": f"File size ({file_size} bytes) exceeds maximum allowed size of {self.max_file_size} bytes."}]
 
179
 
180
- with tempfile.TemporaryDirectory() as temp_dir:
181
  if zipfile.is_zipfile(file.name):
182
- dataset.extend(self._process_zip_file(file.name, temp_dir))
183
  else:
184
- dataset.extend(self._process_single_file(file))
185
 
186
  except Exception as e:
187
- logger.error(f"Error processing file: {str(e)}")
188
  return []
189
 
190
- return dataset
191
 
192
- def _process_zip_file(self, zip_path: str, temp_dir: str) -> List[Dict]:
193
  """Process ZIP file contents"""
194
  results = []
195
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
196
- zip_ref.extractall(temp_dir)
197
- for root, _, files in os.walk(temp_dir):
198
- for filename in files:
199
- filepath = os.path.join(root, filename)
200
- if self.is_text_file(filepath):
201
- try:
202
- with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
203
- content = f.read()
204
- if content.strip():
205
- results.append({
206
- "source": "file",
207
- "filename": filename,
208
- "content": content,
209
- "timestamp": datetime.now().isoformat()
210
- })
211
- except Exception as e:
212
- logger.error(f"Error reading file {filename}: {str(e)}")
 
213
  return results
214
 
215
  def _process_single_file(self, file) -> List[Dict]:
@@ -279,8 +280,9 @@ def create_interface():
279
 
280
  with gr.Tab("File Input"):
281
  file_input = gr.File(
282
- label="Upload text file or ZIP archive",
283
- file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
 
284
  )
285
 
286
  with gr.Tab("Text Input"):
@@ -311,7 +313,7 @@ def create_interface():
311
  # Initialize chatbot
312
  chatbot = Chatbot()
313
 
314
- def process_all_inputs(urls, file, text):
315
  """Process all input types with progress tracking"""
316
  try:
317
  processor = URLProcessor()
@@ -336,8 +338,9 @@ def create_interface():
336
  })
337
 
338
  # Process files
339
- if file:
340
- results.extend(file_processor.process_file(file))
 
341
 
342
  # Process text input
343
  if text:
@@ -395,7 +398,7 @@ def create_interface():
395
  gr.Markdown("""
396
  ### Usage Guidelines
397
  - **URL Processing**: Enter valid HTTP/HTTPS URLs
398
- - **File Input**: Upload text files or ZIP archives
399
  - **Text Input**: Direct text processing
400
  - **Chat**: Load your JSON data and ask questions about it
401
  - Advanced cleaning and validation included
 
165
  except Exception:
166
  return False
167
 
168
+ def process_files(self, files) -> List[Dict]:
169
+ """Process multiple uploaded files and return a single JSON extraction"""
170
+ if not files:
171
  return []
172
 
173
+ combined_data = []
174
  try:
175
+ for file in files:
176
+ file_size = os.path.getsize(file.name)
177
+ if file_size > self.max_file_size:
178
+ logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
179
+ continue # Skip this file
180
 
 
181
  if zipfile.is_zipfile(file.name):
182
+ combined_data.extend(self._process_zip_file(file.name))
183
  else:
184
+ combined_data.extend(self._process_single_file(file))
185
 
186
  except Exception as e:
187
+ logger.error(f"Error processing files: {str(e)}")
188
  return []
189
 
190
+ return combined_data
191
 
192
+ def _process_zip_file(self, zip_path: str) -> List[Dict]:
193
  """Process ZIP file contents"""
194
  results = []
195
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
196
+ with tempfile.TemporaryDirectory() as temp_dir:
197
+ zip_ref.extractall(temp_dir)
198
+ for root, _, files in os.walk(temp_dir):
199
+ for filename in files:
200
+ filepath = os.path.join(root, filename)
201
+ if self.is_text_file(filepath):
202
+ try:
203
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
204
+ content = f.read()
205
+ if content.strip():
206
+ results.append({
207
+ "source": "file",
208
+ "filename": filename,
209
+ "content": content,
210
+ "timestamp": datetime.now().isoformat()
211
+ })
212
+ except Exception as e:
213
+ logger.error(f"Error reading file {filename}: {str(e)}")
214
  return results
215
 
216
  def _process_single_file(self, file) -> List[Dict]:
 
280
 
281
  with gr.Tab("File Input"):
282
  file_input = gr.File(
283
+ label="Upload text files or ZIP archives",
284
+ file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"],
285
+ multiple=True # Allow multiple file uploads
286
  )
287
 
288
  with gr.Tab("Text Input"):
 
313
  # Initialize chatbot
314
  chatbot = Chatbot()
315
 
316
+ def process_all_inputs(urls, files, text):
317
  """Process all input types with progress tracking"""
318
  try:
319
  processor = URLProcessor()
 
338
  })
339
 
340
  # Process files
341
+ if files:
342
+ combined_data = file_processor.process_files(files)
343
+ results.extend(combined_data)
344
 
345
  # Process text input
346
  if text:
 
398
  gr.Markdown("""
399
  ### Usage Guidelines
400
  - **URL Processing**: Enter valid HTTP/HTTPS URLs
401
+ - **File Input**: Upload text files or ZIP archives (multiple files allowed)
402
  - **Text Input**: Direct text processing
403
  - **Chat**: Load your JSON data and ask questions about it
404
  - Advanced cleaning and validation included