Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Apr 5

Commit

505b1a3

verified ·

1 Parent(s): c70f013

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -51

app.py CHANGED Viewed

@@ -119,7 +119,7 @@ class URLProcessor:
                 'timestamp': datetime.now().isoformat()
             }
         except Exception as e:
-            logger.error(f"Google Drive processing failed: {e}")
             return None
     def _handle_google_calendar(self, url: str) -> Optional[Dict]:
@@ -212,6 +212,58 @@ class FileProcessor:
         return dataset
     def chunk_data(self, data, max_size=2953):  # 2953 is the max size for version 1 QR code
         """Chunk data into smaller pieces if it exceeds max_size."""
         json_str = json.dumps(data, ensure_ascii=False)
@@ -226,54 +278,14 @@ class FileProcessor:
             json_str = json_str[max_size:]
         return chunks
-def _process_single_file(self, file) -> List[Dict]:
-    """Process a single file"""
-    try:
-        file_stat = os.stat(file.name)
-        # For very large files, read in chunks and summarize
-        if file_stat.st_size > 100 * 1024 * 1024:  # 100MB
-            logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
-            # Read first and last 1MB for extremely large files
-            content = ""
-            with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
-                content = f.read(1 * 1024 * 1024)  # First 1MB
-                content += "\n...[Content truncated due to large file size]...\n"
-                # Seek to the last 1MB
-                f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
-                content += f.read()  # Last 1MB
-        else:
-            # Regular file processing
-            with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
-                content = f.read()
-        return [{
-            'source': 'filename',  # Assuming 'source' should be a string value
-            'filename': os.path.basename(file.name),
-            'file_size': file_stat.st_size,
-            'mime_type': mimetypes.guess_type(file.name)[0],
-            'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
-            'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
-            'content': content,
-            'timestamp': datetime.now().isoformat()
-        }]
-    except Exception as e:
-        logger.error(f"File processing error: {e}")
-        return []
 def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
     """Clean and validate JSON data"""
     try:
-        # If it's a string, try to parse it
         if isinstance(data, str):
-            # Remove any existing content and extra whitespace
             data = data.strip()
             data = json.loads(data)
-        # Convert to string and back to ensure proper JSON format
         cleaned = json.loads(json.dumps(data))
         return cleaned
     except json.JSONDecodeError as e:
@@ -290,9 +302,8 @@ def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]
         output_dir.mkdir(parents=True, exist_ok=True)
         if combined:
-            # Generate single QR code for all data
             cleaned_data = clean_json(data)
-            if cleaned_data is None:  # Check if cleaning failed
                 logger.error("Failed to clean data for QR code generation.")
                 return []
@@ -311,14 +322,13 @@ def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]
             img.save(str(output_path))
             return [str(output_path)]
         else:
-            # Generate separate QR codes for each item
             if isinstance(data, list):
                 paths = []
                 for idx, item in enumerate(data):
                     cleaned_item = clean_json(item)
-                    if cleaned_item is None:  # Check if cleaning failed
                         logger.error(f"Failed to clean item {idx} for QR code generation.")
-                        continue  # Skip this item
                     qr = qrcode.QRCode(
                         version=None,
@@ -336,9 +346,8 @@ def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]
                     paths.append(str(output_path))
                 return paths
             else:
-                # Single item, not combined
                 cleaned_item = clean_json(data)
-                if cleaned_item is None:  # Check if cleaning failed
                     logger.error("Failed to clean single item for QR code generation.")
                     return []
@@ -453,7 +462,6 @@ def create_interface():
                 # Process text input first (since it's direct JSON)
                 if text and text.strip():
                     try:
-                        # Try to parse as JSON
                         json_data = json.loads(text)
                         if isinstance(json_data, list):
                             results.extend(json_data)
@@ -490,7 +498,6 @@ def create_interface():
                 # Generate QR codes
                 if results:
                     if combine:
-                        # Chunk the data if necessary
                         combined_data = []
                         for item in results:
                             combined_data.extend(file_processor.chunk_data(item))
@@ -505,7 +512,7 @@ def create_interface():
                             f"✅ Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
                         )
                     else:
-                        return None, [], "❌ Failed to generate QR codes. Please check the input data."
                 else:
                     return None, [], "⚠️ No valid content to process. Please provide some input data."

                 'timestamp': datetime.now().isoformat()
             }
         except Exception as e:
+            logger.error (f"Google Drive processing failed: {e}")
             return None
     def _handle_google_calendar(self, url: str) -> Optional[Dict]:
         return dataset
+    def _process_single_file(self, file) -> List[Dict]:
+        """Process a single file"""
+        try:
+            file_stat = os.stat(file.name)
+            # For very large files, read in chunks and summarize
+            if file_stat.st_size > 100 * 1024 * 1024:  # 100MB
+                logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
+                # Read first and last 1MB for extremely large files
+                content = ""
+                with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read(1 * 1024 * 1024)  # First 1MB
+                    content += "\n...[Content truncated due to large file size]...\n"
+                    # Seek to the last 1MB
+                    f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
+                    content += f.read()  # Last 1MB
+            else:
+                # Regular file processing
+                with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+            return [{
+                'source': 'filename',
+                'filename': os.path.basename(file.name),
+                'file_size': file_stat.st_size,
+                'mime_type': mimetypes.guess_type(file.name)[0],
+                'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
+                'modified': datetime.fromtimestamp(file_stat.st_m time).isoformat(),
+                'content': content,
+                'timestamp': datetime.now().isoformat()
+            }]
+        except Exception as e:
+            logger.error(f"File processing error: {e}")
+            return []
+    def _process_zip_file(self, zip_file_path: str, extract_to: str) -> List[Dict]:
+        """Process a zip file and extract its contents"""
+        dataset = []
+        try:
+            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+                zip_ref.extractall(extract_to)
+                for file_info in zip_ref.infolist():
+                    if file_info.is_dir():
+                        continue
+                    extracted_file_path = os.path.join(extract_to, file_info.filename)
+                    dataset.extend(self._process_single_file(open(extracted_file_path, 'rb')))
+        except Exception as e:
+            logger.error(f"Error processing zip file: {e}")
+        return dataset
     def chunk_data(self, data, max_size=2953):  # 2953 is the max size for version 1 QR code
         """Chunk data into smaller pieces if it exceeds max_size."""
         json_str = json.dumps(data, ensure_ascii=False)
             json_str = json_str[max_size:]
         return chunks
 def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
     """Clean and validate JSON data"""
     try:
         if isinstance(data, str):
             data = data.strip()
             data = json.loads(data)
         cleaned = json.loads(json.dumps(data))
         return cleaned
     except json.JSONDecodeError as e:
         output_dir.mkdir(parents=True, exist_ok=True)
         if combined:
             cleaned_data = clean_json(data)
+            if cleaned_data is None:
                 logger.error("Failed to clean data for QR code generation.")
                 return []
             img.save(str(output_path))
             return [str(output_path)]
         else:
             if isinstance(data, list):
                 paths = []
                 for idx, item in enumerate(data):
                     cleaned_item = clean_json(item)
+                    if cleaned_item is None:
                         logger.error(f"Failed to clean item {idx} for QR code generation.")
+                        continue
                     qr = qrcode.QRCode(
                         version=None,
                     paths.append(str(output_path))
                 return paths
             else:
                 cleaned_item = clean_json(data)
+                if cleaned_item is None:
                     logger.error("Failed to clean single item for QR code generation.")
                     return []
                 # Process text input first (since it's direct JSON)
                 if text and text.strip():
                     try:
                         json_data = json.loads(text)
                         if isinstance(json_data, list):
                             results.extend(json_data)
                 # Generate QR codes
                 if results:
                     if combine:
                         combined_data = []
                         for item in results:
                             combined_data.extend(file_processor.chunk_data(item))
                             f"✅ Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
                         )
                     else:
+                        return None, [], " ❌ Failed to generate QR codes. Please check the input data."
                 else:
                     return None, [], "⚠️ No valid content to process. Please provide some input data."