Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -270,7 +270,6 @@ class EnhancedFileProcessor:
|
|
270 |
"""Process an archive file with enhanced extraction"""
|
271 |
dataset = []
|
272 |
try:
|
273 |
-
# Handle ZIP archives
|
274 |
if zipfile.is_zipfile(archive_path):
|
275 |
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
276 |
zip_ref.extractall(extract_to)
|
@@ -280,12 +279,17 @@ class EnhancedFileProcessor:
|
|
280 |
if extracted_path.suffix.lower() in self.supported_extensions:
|
281 |
with open(extracted_path, 'rb') as f:
|
282 |
dataset.extend(self._process_single_file(f))
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
except Exception as e:
|
287 |
-
logger.error(f"Archive processing error: {
|
288 |
-
|
289 |
return dataset
|
290 |
|
291 |
def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
|
|
|
270 |
"""Process an archive file with enhanced extraction"""
|
271 |
dataset = []
|
272 |
try:
|
|
|
273 |
if zipfile.is_zipfile(archive_path):
|
274 |
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
275 |
zip_ref.extractall(extract_to)
|
|
|
279 |
if extracted_path.suffix.lower() in self.supported_extensions:
|
280 |
with open(extracted_path, 'rb') as f:
|
281 |
dataset.extend(self._process_single_file(f))
|
282 |
+
elif tarfile.is_tarfile(archive_path):
|
283 |
+
with tarfile.open(archive_path, 'r') as tar_ref:
|
284 |
+
tar_ref.extractall(extract_to)
|
285 |
+
for member in tar_ref.getmembers():
|
286 |
+
if member.isfile():
|
287 |
+
extracted_path = extract_to / member.name
|
288 |
+
if extracted_path.suffix.lower() in self.supported_extensions:
|
289 |
+
with open(extracted_path, 'rb') as f:
|
290 |
+
dataset.extend(self._process_single_file(f))
|
291 |
except Exception as e:
|
292 |
+
logger.error(f"Archive processing error: {e}")
|
|
|
293 |
return dataset
|
294 |
|
295 |
def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
|