Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Mar 19

Commit

61eaf76

verified ·

1 Parent(s): 3a075bd

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -34

app.py CHANGED Viewed

@@ -151,10 +151,12 @@ class URLProcessor:
 class FileProcessor:
     """Class to handle file processing"""
-    def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):  # 2GB default
         self.max_file_size = max_file_size
         self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
     def is_text_file(self, filepath: str) -> bool:
         """Check if file is a text file"""
@@ -171,57 +173,83 @@ class FileProcessor:
             return []
         combined_data = []
         try:
             for file in files:
                 # Check if the file is a Gradio File object or a string path
-                file_name = file.name if isinstance(file, gr.File) else file
-                file_size = os.path.getsize(file_name)
                 if file_size > self.max_file_size:
                     logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
                     continue  # Skip this file
-                if zipfile.is_zipfile(file_name):
-                    combined_data.extend(self._process_zip_file(file_name))
                 else:
-                    combined_data.extend(self._process_single_file(file_name))
         except Exception as e:
             logger.error(f"Error processing files: {str(e)}")
-            return []
-        finally:
-            return combined_data
     def _process_zip_file(self, zip_path: str) -> List[Dict]:
-        """Process ZIP file contents"""
         results = []
-        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-            with tempfile.TemporaryDirectory() as temp_dir:
-                zip_ref.extractall(temp_dir)
-                for root, _, files in os.walk(temp_dir):
-                    for filename in files:
-                        filepath = os.path.join(root, filename)
-                        if self.is_text_file(filepath):
-                            try:
-                                with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
-                                    content = f.read()
-                                if content.strip():
-                                    results.append({
-                                        "source": "file",
-                                        "filename": filename,
-                                        "content": content,
-                                        "timestamp": datetime.now().isoformat()
-                                    })
-                            except Exception as e:
-                                logger.error(f"Error reading file {filename}: {str(e)}")
         return results
     def _process_single_file(self, file_path: str) -> List[Dict]:
         try:
             file_stat = os.stat(file_path)
             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                 content = f.read()
-            return [{
                 'source': 'file',
                 'filename': os.path.basename(file_path),
                 'file_size': file_stat.st_size,
@@ -230,10 +258,10 @@ class FileProcessor:
                 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
                 'content': content,
                 'timestamp': datetime.now().isoformat()
-            }]
         except Exception as e:
             logger.error(f"File processing error: {e}")
-            return []
 class Chatbot:
     """Simple chatbot that uses provided JSON data for responses."""

 class FileProcessor:
     """Class to handle file processing"""
+    def __init__(self, max_file_size: int = 1536 * 1024 * 1024):  # 1.5GB default
         self.max_file_size = max_file_size
         self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
+        self.processed_zip_count = 0
+        self.max_zip_files = 5
     def is_text_file(self, filepath: str) -> bool:
         """Check if file is a text file"""
             return []
         combined_data = []
+        self.processed_zip_count = 0
         try:
             for file in files:
                 # Check if the file is a Gradio File object or a string path
+                file_path = file.name if isinstance(file, gr.File) else file
+                # Skip if it's a directory
+                if os.path.isdir(file_path):
+                    logger.warning(f"Skipping directory: {file_path}")
+                    continue
+                # Skip if file doesn't exist
+                if not os.path.exists(file_path):
+                    logger.warning(f"File does not exist: {file_path}")
+                    continue
+                # Check file size
+                file_size = os.path.getsize(file_path)
                 if file_size > self.max_file_size:
                     logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
                     continue  # Skip this file
+                # Process based on file type
+                if zipfile.is_zipfile(file_path):
+                    if self.processed_zip_count >= self.max_zip_files:
+                        logger.warning(f"Maximum number of ZIP files ({self.max_zip_files}) reached, skipping {file_path}")
+                        continue
+                    self.processed_zip_count += 1
+                    zip_results = self._process_zip_file(file_path)
+                    combined_data.extend(zip_results)
                 else:
+                    file_results = self._process_single_file(file_path)
+                    combined_data.extend(file_results)
         except Exception as e:
             logger.error(f"Error processing files: {str(e)}")
+        return combined_data
     def _process_zip_file(self, zip_path: str) -> List[Dict]:
+        """Process ZIP file contents more efficiently"""
         results = []
+        try:
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                # Get list of files in the zip
+                file_list = [file for file in zip_ref.namelist()
+                            if not file.endswith('/') and not file.startswith('__MACOSX')]
+                # Process each file directly from the zip without extracting all files
+                for filename in file_list:
+                    # Check if it's a text file by extension
+                    if any(filename.lower().endswith(ext) for ext in self.supported_text_extensions):
+                        try:
+                            with zip_ref.open(filename) as file:
+                                content content = file.read().decode('utf-8', errors='ignore')
+                            if content.strip():
+                                results.append({
+                                    "source": "zip_file",
+                                    "filename": filename,
+                                    "content": content,
+                                    "timestamp": datetime.now().isoformat()
+                                })
+                        except Exception as e:
+                            logger.error(f"Error reading file {filename} from zip: {str(e)}")
+        except Exception as e:
+            logger.error(f"Error processing ZIP file {zip_path}: {str(e)}")
         return results
     def _process_single_file(self, file_path: str) -> List[Dict]:
+        """Process a single file and return its content"""
+        results = []
         try:
             file_stat = os.stat(file_path)
             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                 content = f.read()
+            results.append({
                 'source': 'file',
                 'filename': os.path.basename(file_path),
                 'file_size': file_stat.st_size,
                 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
                 'content': content,
                 'timestamp': datetime.now().isoformat()
+            })
         except Exception as e:
             logger.error(f"File processing error: {e}")
 class Chatbot:
     """Simple chatbot that uses provided JSON data for responses."""