acecalisto3 commited on
Commit
958af8e
·
verified ·
1 Parent(s): 1538970

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -270,7 +270,6 @@ class EnhancedFileProcessor:
270
  """Process an archive file with enhanced extraction"""
271
  dataset = []
272
  try:
273
- # Handle ZIP archives
274
  if zipfile.is_zipfile(archive_path):
275
  with zipfile.ZipFile(archive_path, 'r') as zip_ref:
276
  zip_ref.extractall(extract_to)
@@ -280,12 +279,17 @@ class EnhancedFileProcessor:
280
  if extracted_path.suffix.lower() in self.supported_extensions:
281
  with open(extracted_path, 'rb') as f:
282
  dataset.extend(self._process_single_file(f))
283
-
284
- # TODO: Add support for other archive types (tar, 7z, etc.)
285
-
 
 
 
 
 
 
286
  except Exception as e:
287
- logger.error(f"Archive processing error: {{e}}")
288
-
289
  return dataset
290
 
291
  def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
 
270
  """Process an archive file with enhanced extraction"""
271
  dataset = []
272
  try:
 
273
  if zipfile.is_zipfile(archive_path):
274
  with zipfile.ZipFile(archive_path, 'r') as zip_ref:
275
  zip_ref.extractall(extract_to)
 
279
  if extracted_path.suffix.lower() in self.supported_extensions:
280
  with open(extracted_path, 'rb') as f:
281
  dataset.extend(self._process_single_file(f))
282
+ elif tarfile.is_tarfile(archive_path):
283
+ with tarfile.open(archive_path, 'r') as tar_ref:
284
+ tar_ref.extractall(extract_to)
285
+ for member in tar_ref.getmembers():
286
+ if member.isfile():
287
+ extracted_path = extract_to / member.name
288
+ if extracted_path.suffix.lower() in self.supported_extensions:
289
+ with open(extracted_path, 'rb') as f:
290
+ dataset.extend(self._process_single_file(f))
291
  except Exception as e:
292
+ logger.error(f"Archive processing error: {e}")
 
293
  return dataset
294
 
295
  def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]: