acecalisto3 commited on
Commit
6fb3663
·
verified ·
1 Parent(s): 8b83f9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -16
app.py CHANGED
@@ -7,7 +7,7 @@ import mimetypes
7
  import zipfile
8
  import tempfile
9
  from datetime import datetime
10
- from typing import List, Dict, Optional
11
  from pathlib import Path
12
  import requests
13
  import validators
@@ -94,7 +94,7 @@ class URLProcessor:
94
  try:
95
  file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
96
  if not file_id:
97
- logger.error(f"Invalid Google Drive URL: { url}")
98
  return None
99
 
100
  direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
@@ -165,7 +165,7 @@ class FileProcessor:
165
  except Exception:
166
  return False
167
 
168
- def process_files(self, files) -> List[Dict]:
169
  """Process multiple uploaded files and return a single JSON extraction"""
170
  if not files:
171
  return []
@@ -173,15 +173,17 @@ class FileProcessor:
173
  combined_data = []
174
  try:
175
  for file in files:
176
- file_size = os.path.getsize(file.name)
 
 
177
  if file_size > self.max_file_size:
178
  logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
179
  continue # Skip this file
180
 
181
- if zipfile.is_zipfile(file.name):
182
- combined_data.extend(self._process_zip_file(file.name))
183
  else:
184
- combined_data.extend(self._process_single_file(file))
185
 
186
  except Exception as e:
187
  logger.error(f"Error processing files: {str(e)}")
@@ -213,17 +215,17 @@ class FileProcessor:
213
  logger.error(f"Error reading file {filename}: {str(e)}")
214
  return results
215
 
216
- def _process_single_file(self, file) -> List[Dict]:
217
  try:
218
- file_stat = os.stat(file.name)
219
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
220
  content = f.read()
221
 
222
  return [{
223
  'source': 'file',
224
- 'filename': os.path.basename(file.name),
225
  'file_size': file_stat.st_size,
226
- 'mime_type': mimetypes.guess_type(file.name)[0 ],
227
  'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
228
  'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
229
  'content': content,
@@ -286,7 +288,8 @@ def create_interface():
286
  with gr.Tab("File Input"):
287
  file_input = gr.File(
288
  label="Upload text files or ZIP archives",
289
- file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
 
290
  )
291
 
292
  with gr.Tab("Text Input"):
@@ -326,7 +329,7 @@ def create_interface():
326
 
327
  # Process URLs
328
  if urls:
329
- url_list = re.split(r'[,\n]', urls)
330
  url_list = [url.strip() for url in url_list if url.strip()]
331
 
332
  for url in url_list:
@@ -344,7 +347,7 @@ def create_interface():
344
  # Process files
345
  if files:
346
  combined_data = file_processor.process_files(files)
347
- results.extend (combined_data)
348
 
349
  # Process text input
350
  if text:
@@ -402,7 +405,7 @@ def create_interface():
402
  gr.Markdown("""
403
  ### Usage Guidelines
404
  - **URL Processing**: Enter valid HTTP/HTTPS URLs
405
- - **File Input**: Upload text files or ZIP archives (single file allowed)
406
  - **Text Input**: Direct text processing
407
  - **Chat**: Load your JSON data and ask questions about it
408
  - Advanced cleaning and validation included
 
7
  import zipfile
8
  import tempfile
9
  from datetime import datetime
10
+ from typing import List, Dict, Optional, Union, Any
11
  from pathlib import Path
12
  import requests
13
  import validators
 
94
  try:
95
  file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
96
  if not file_id:
97
+ logger.error(f"Invalid Google Drive URL: {url}")
98
  return None
99
 
100
  direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
 
165
  except Exception:
166
  return False
167
 
168
+ def process_files(self, files: Union[List[gr.File], List[str]]) -> List[Dict]:
169
  """Process multiple uploaded files and return a single JSON extraction"""
170
  if not files:
171
  return []
 
173
  combined_data = []
174
  try:
175
  for file in files:
176
+ # Check if the file is a Gradio File object or a string path
177
+ file_name = file.name if isinstance(file, gr.File) else file
178
+ file_size = os.path.getsize(file_name)
179
  if file_size > self.max_file_size:
180
  logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
181
  continue # Skip this file
182
 
183
+ if zipfile.is_zipfile(file_name):
184
+ combined_data.extend(self._process_zip_file(file_name))
185
  else:
186
+ combined_data.extend(self._process_single_file(file_name))
187
 
188
  except Exception as e:
189
  logger.error(f"Error processing files: {str(e)}")
 
215
  logger.error(f"Error reading file {filename}: {str(e)}")
216
  return results
217
 
218
+ def _process_single_file(self, file_path: str) -> List[Dict]:
219
  try:
220
+ file_stat = os.stat(file_path)
221
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
222
  content = f.read()
223
 
224
  return [{
225
  'source': 'file',
226
+ 'filename': os.path.basename(file_path),
227
  'file_size': file_stat.st_size,
228
+ 'mime_type': mimetypes.guess_type(file_path)[0],
229
  'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
230
  'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
231
  'content': content,
 
288
  with gr.Tab("File Input"):
289
  file_input = gr.File(
290
  label="Upload text files or ZIP archives",
291
+ file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"],
292
+ multiple=True # Allow multiple file uploads
293
  )
294
 
295
  with gr.Tab("Text Input"):
 
329
 
330
  # Process URLs
331
  if urls:
332
+ url_list = re.split(r '[,\n]', urls)
333
  url_list = [url.strip() for url in url_list if url.strip()]
334
 
335
  for url in url_list:
 
347
  # Process files
348
  if files:
349
  combined_data = file_processor.process_files(files)
350
+ results.extend(combined_data)
351
 
352
  # Process text input
353
  if text:
 
405
  gr.Markdown("""
406
  ### Usage Guidelines
407
  - **URL Processing**: Enter valid HTTP/HTTPS URLs
408
+ - **File Input**: Upload multiple text files or ZIP archives
409
  - **Text Input**: Direct text processing
410
  - **Chat**: Load your JSON data and ask questions about it
411
  - Advanced cleaning and validation included