Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -181,10 +181,10 @@ class EnhancedURLProcessor:
|
|
181 |
class EnhancedFileProcessor:
|
182 |
"""Advanced file processing with complete content extraction"""
|
183 |
|
184 |
-
def __init__(self, max_file_size: int = 5 * 1024 * 1024 * 1024):
|
185 |
self.max_file_size = max_file_size
|
186 |
self.supported_extensions = {
|
187 |
-
'.txt', '.md', '.csv', '.json', '.xml', '.html', '.
|
188 |
'.log', '.yml', '.yaml', '.ini', '.conf', '.cfg',
|
189 |
'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
|
190 |
'.pdf', '.doc', '.docx', '.rtf', '.odt'
|
@@ -199,7 +199,7 @@ class EnhancedFileProcessor:
|
|
199 |
try:
|
200 |
file_size = os.path.getsize(file.name)
|
201 |
if file_size > self.max_file_size:
|
202 |
-
logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
|
203 |
return []
|
204 |
|
205 |
with tempfile.TemporaryDirectory() as temp_dir:
|
@@ -212,7 +212,7 @@ class EnhancedFileProcessor:
|
|
212 |
dataset.extend(self._process_single_file(file))
|
213 |
|
214 |
except Exception as e:
|
215 |
-
logger.error(f"Error processing file: {str(e)}")
|
216 |
return []
|
217 |
|
218 |
return dataset
|
@@ -263,7 +263,7 @@ class EnhancedFileProcessor:
|
|
263 |
'timestamp': datetime.now().isoformat()
|
264 |
}]
|
265 |
except Exception as e:
|
266 |
-
logger.error(f"File processing error: {e}")
|
267 |
return []
|
268 |
|
269 |
def _process_archive(self, archive_path: str, extract_to: Path) -> List[Dict]:
|
@@ -284,7 +284,7 @@ class EnhancedFileProcessor:
|
|
284 |
# TODO: Add support for other archive types (tar, 7z, etc.)
|
285 |
|
286 |
except Exception as e:
|
287 |
-
logger.error(f"Archive processing error: {e}")
|
288 |
|
289 |
return dataset
|
290 |
|
@@ -332,7 +332,7 @@ class EnhancedFileProcessor:
|
|
332 |
|
333 |
return chunks
|
334 |
except Exception as e:
|
335 |
-
logger.error(f"Error chunking data: {e}")
|
336 |
return []
|
337 |
|
338 |
# Calculate number of chunks needed
|
@@ -357,7 +357,7 @@ class EnhancedFileProcessor:
|
|
357 |
return chunks
|
358 |
|
359 |
except Exception as e:
|
360 |
-
logger.error(f"Error chunking data: {e}")
|
361 |
return []
|
362 |
|
363 |
def generate_stylish_qr(data: Union[str, Dict],
|
|
|
181 |
class EnhancedFileProcessor:
|
182 |
"""Advanced file processing with complete content extraction"""
|
183 |
|
184 |
+
def __init__(self, max_file_size: int = 5 * 1024 * 1024 * 1024 * 1024 *1024):
|
185 |
self.max_file_size = max_file_size
|
186 |
self.supported_extensions = {
|
187 |
+
'.txt', '.md', '.csv', '.json', '.xml', '.html', '.html',
|
188 |
'.log', '.yml', '.yaml', '.ini', '.conf', '.cfg',
|
189 |
'.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
|
190 |
'.pdf', '.doc', '.docx', '.rtf', '.odt'
|
|
|
199 |
try:
|
200 |
file_size = os.path.getsize(file.name)
|
201 |
if file_size > self.max_file_size:
|
202 |
+
logger.warning(f"File size ({{file_size}} bytes) exceeds maximum allowed size")
|
203 |
return []
|
204 |
|
205 |
with tempfile.TemporaryDirectory() as temp_dir:
|
|
|
212 |
dataset.extend(self._process_single_file(file))
|
213 |
|
214 |
except Exception as e:
|
215 |
+
logger.error(f"Error processing file: {{str(e)}}")
|
216 |
return []
|
217 |
|
218 |
return dataset
|
|
|
263 |
'timestamp': datetime.now().isoformat()
|
264 |
}]
|
265 |
except Exception as e:
|
266 |
+
logger.error(f"File processing error: {{e}}")
|
267 |
return []
|
268 |
|
269 |
def _process_archive(self, archive_path: str, extract_to: Path) -> List[Dict]:
|
|
|
284 |
# TODO: Add support for other archive types (tar, 7z, etc.)
|
285 |
|
286 |
except Exception as e:
|
287 |
+
logger.error(f"Archive processing error: {{e}}")
|
288 |
|
289 |
return dataset
|
290 |
|
|
|
332 |
|
333 |
return chunks
|
334 |
except Exception as e:
|
335 |
+
logger.error(f"Error chunking data: {{e}}")
|
336 |
return []
|
337 |
|
338 |
# Calculate number of chunks needed
|
|
|
357 |
return chunks
|
358 |
|
359 |
except Exception as e:
|
360 |
+
logger.error(f"Error chunking data: {{e}}")
|
361 |
return []
|
362 |
|
363 |
def generate_stylish_qr(data: Union[str, Dict],
|