acecalisto3 commited on
Commit
30f269c
·
verified ·
1 Parent(s): 933b2fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -55
app.py CHANGED
@@ -167,68 +167,56 @@ class FileProcessor:
167
  """Check if the file is a text file based on its extension."""
168
  return any(file_path.lower().endswith(ext) for ext in self.supported_text_extensions)
169
 
170
- def process_files(self, files: Union[List[gr.File], List[str]]) -> List[Dict]:
171
- """Process multiple uploaded files and return a single JSON extraction"""
172
- if not files:
173
- return []
174
-
 
 
 
 
 
 
175
  combined_data = []
176
- self.processed_zip_count = 0
177
 
178
- try:
179
- for file in files:
180
- file_path = file.name if isinstance(file, gr.File) else file
181
-
182
- logger.info(f"Processing file: {file_path}")
183
-
184
- if os.path.isdir(file_path):
185
- logger.warning(f"Skipping directory: {file_path}")
 
 
186
  continue
187
-
188
- if not os.path.exists(file_path):
189
- logger.warning(f"File does not exist: {file_path}")
 
190
  continue
 
 
191
 
192
- file_size = os.path.getsize(file_path)
193
- if file_size > self.max_file_size:
194
- logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
195
- continue
196
-
197
- if zipfile.is_zipfile(file_path):
198
- if self.processed_zip_count >= self.max_zip_files:
199
- logger.warning(f"Maximum number of ZIP files ({self.max_zip_files}) reached, skipping {file_path}")
200
- continue
201
- self.processed_zip_count += 1
202
- zip_results = self._process_zip_file(file_path)
203
- combined_data.extend(zip_results)
204
- elif self.is_text_file(file_path):
205
- file_results = self._process_single_file(file) # Changed file_path to file
206
- combined_data.extend(file_results)
207
- else:
208
- logger.warning(f"Unsupported file type: {file_path}")
209
-
210
- except Exception as e:
211
- logger.error(f"Error processing files: {str(e)}")
212
-
213
  return combined_data
214
-
215
 
216
- def _process_single_file(self, file) -> List[Dict]:
217
- try:
218
- file_stat = os.stat(file.name)
219
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
220
- content = f.read()
221
-
222
- return [{
223
- 'source': 'file',
224
- 'filename': os.path.basename(file.name),
225
- 'file_size': file_stat.st_size,
226
- 'mime_type': mimetypes.guess_type(file.name)[0],
227
- 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
228
- 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
229
- 'content': content,
230
- 'timestamp': datetime.now().isoformat()
231
- }]
232
  except Exception as e:
233
  logger.error(f"File processing error: {e}")
234
  return []
 
167
  """Check if the file is a text file based on its extension."""
168
  return any(file_path.lower().endswith(ext) for ext in self.supported_text_extensions)
169
 
170
+
171
+ def validate_filepath(path: Path) -> bool:
172
+ """Validate file exists and has supported extension"""
173
+ try:
174
+ return path.exists() and path.is_file() and path.suffix.lower() in valid_extensions
175
+ except Exception as e:
176
+ logger.error(f"Validation error for {path}: {str(e)}")
177
+ return False
178
+
179
+ def process_files(base_path: str = "/app/data") -> list:
180
+ """Process files with validation and error handling"""
181
  combined_data = []
 
182
 
183
+ base_dir = Path(base_path)
184
+ if not base_dir.exists():
185
+ base_dir.mkdir(parents=True, exist_ok=True)
186
+ logger.info(f"Created data directory at {base_dir}")
187
+
188
+ for item in base_dir.glob('**/*'):
189
+ try:
190
+ # Skip directories immediately
191
+ if item.is_dir():
192
+ logger.debug(f"Skipping directory: {item}")
193
  continue
194
+
195
+ # Validate file using shared function
196
+ if not validate_filepath(item):
197
+ logger.warning(f"Invalid file skipped: {item}")
198
  continue
199
+
200
+ logger.info(f"Processing valid file: {item.name}")
201
 
202
+ # Add actual processing logic here
203
+ file_data = process_single_file(item) # Your processing function
204
+ combined_data.append(file_data)
205
+
206
+ except Exception as e:
207
+ logger.error(f"Failed processing {item}: {str(e)}")
208
+ continue
209
+
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  return combined_data
 
211
 
212
+ def process_single_file(file_path: Path) -> dict:
213
+ """Example processing function"""
214
+ # Add your actual file processing logic here
215
+ return {
216
+ 'filename': file_path.name,
217
+ 'content': "processed content", # Replace with real content
218
+ 'metadata': {} # Add actual metadata
219
+ }
 
 
 
 
 
 
 
 
220
  except Exception as e:
221
  logger.error(f"File processing error: {e}")
222
  return []