Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Apr 3

Commit

8cbfc35

verified ·

1 Parent(s): 97adf15

Update app.py

Browse files

Files changed (1) hide show

app.py +285 -322

app.py CHANGED Viewed

@@ -4,28 +4,26 @@ import re
 import time
 import logging
 import mimetypes
 import tempfile
 from datetime import datetime
 from pathlib import Path
 from urllib.parse import urlparse
-from typing import List, Dict, Tuple, Union, Optional
 import requests
 import validators
 import gradio as gr
 from diskcache import Cache
 from bs4 import BeautifulSoup
 from fake_useragent import UserAgent
 from cleantext import clean
 import qrcode
-import PyPDF2
-from PIL import Image
-import pytesseract
-import cv2
-import numpy as np
-import fitz  # PyMuPDF
-import zipfile
-# Setup logging with detailed configuration
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
@@ -36,6 +34,9 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 class URLProcessor:
     def __init__(self):
         self.session = requests.Session()
@@ -48,13 +49,6 @@ class URLProcessor:
             'Connection': 'keep-alive',
             'Upgrade-Insecure-Requests': '1'
         })
-        self.supported_content_types = {
-            'text/html': self._fetch_html_content,
-            'application/pdf': self._fetch_pdf_content,
-            'image': self._fetch_image_content,
-            'application/json': self._fetch_json_content,
-            'text/plain': self._fetch_text_content
-        }
     def advanced_text_cleaning(self, text: str) -> str:
         """Robust text cleaning with version compatibility"""
@@ -86,7 +80,7 @@ class URLProcessor:
         try:
             if not validators.url(url):
                 return {'is_valid': False, 'message': 'Invalid URL format'}
             response = self.session.head(url, timeout=self.timeout)
             response.raise_for_status()
             return {'is_valid': True, 'message': 'URL is valid and accessible'}
@@ -94,31 +88,18 @@ class URLProcessor:
             return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
     def fetch_content(self, url: str) -> Optional[Dict]:
-        """Universal content fetcher with enhanced content type handling"""
         try:
-            # Special case handling
             if 'drive.google.com' in url:
                 return self._handle_google_drive(url)
             if 'calendar.google.com' in url and 'ical' in url:
                 return self._handle_google_calendar(url)
-            # Get content type
-            response = self.session.head(url, timeout=self.timeout)
-            content_type = response.headers.get('Content-Type', '').split(';')[0].lower()
-            # Find appropriate handler
-            handler = None
-            for supported_type, type_handler in self.supported_content_types.items():
-                if content_type.startswith(supported_type):
-                    handler = type_handler
-                    break
-            if handler:
-                return handler(url)
-            else:
-                logger.warning(f"Unsupported content type: {content_type}")
-                return self._fetch_text_content(url)
         except Exception as e:
             logger.error(f"Content fetch failed: {e}")
             return None
@@ -130,11 +111,11 @@ class URLProcessor:
             if not file_id:
                 logger.error(f"Invalid Google Drive URL: {url}")
                 return None
             direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
             response = self.session.get(direct_url, timeout=self.timeout)
             response.raise_for_status()
             return {
                 'content': response.text,
                 'content_type': response.headers.get('Content-Type', ''),
@@ -159,180 +140,48 @@ class URLProcessor:
             return None
     def _fetch_html_content(self, url: str) -> Optional[Dict]:
-        """Enhanced HTML content processing with metadata extraction"""
         try:
             response = self.session.get(url, timeout=self.timeout)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, 'html.parser')
             # Remove unwanted elements
             for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
                 element.decompose()
             # Extract main content
             main_content = soup.find('main') or soup.find('article') or soup.body
-            # Extract metadata
-            metadata = {
-                'title': soup.title.string if soup.title else None,
-                'description': soup.find('meta', {'name': 'description'})['content'] if soup.find('meta', {'name': 'description'}) else None,
-                'keywords': soup.find('meta', {'name': 'keywords'})['content'] if soup.find('meta', {'name': 'keywords'}) else None,
-                'author': soup.find('meta', {'name': 'author'})['content'] if soup.find('meta', {'name': 'author'}) else None
-            }
-            # Clean and structure content
-            text_content = main_content.get_text(separator='\n', strip=True)
-            cleaned_content = self.advanced_text_cleaning(text_content)
-            return {
-                'content': cleaned_content,
-                'metadata': metadata,
-                'content_type': response.headers.get('Content-Type', ''),
-                'timestamp': datetime.now().isoformat()
-            }
-        except Exception as e:
-            logger.error(f"HTML processing failed: {e}")
-            return None
-    def _fetch_pdf_content(self, url: str) -> Optional[Dict]:
-        """Process PDF content with enhanced metadata extraction"""
-        try:
-            response = self.session.get(url, timeout=self.timeout)
-            response.raise_for_status()
-            with tempfile.NamedTemporaryFile(suffix='.pdf') as temp_file:
-                temp_file.write(response.content)
-                temp_file.flush()
-                # Extract text and metadata using PyMuPDF
-                doc = fitz.open(temp_file.name)
-                # Extract text with formatting preservation
-                text = ""
-                metadata = {
-                    'title': doc.metadata.get('title'),
-                    'author': doc.metadata.get('author'),
-                    'subject': doc.metadata.get('subject'),
-                    'keywords': doc.metadata.get('keywords'),
-                    'creator': doc.metadata.get('creator'),
-                    'producer': doc.metadata.get('producer'),
-                    'page_count': len(doc),
-                    'file_size': os.path.getsize(temp_file.name),
-                    'version': doc.version
-                }
-                # Extract text with layout preservation
-                for page in doc:
-                    blocks = page.get_text("blocks")
-                    for block in blocks:
-                        if block[6] == 0:  # Text block
-                            text += block[4] + "\n"
-                doc.close()
-                cleaned_content = self.advanced_text_cleaning(text)
                 return {
-                    'content': cleaned_content,
-                    'metadata': metadata,
-                    'content_type': 'application/pdf',
-                    'timestamp': datetime.now().isoformat()
-                }
-        except Exception as e:
-            logger.error(f"PDF processing failed: {e}")
-            return None
-    def _fetch_image_content(self, url: str) -> Optional[Dict]:
-        """Process image content with OCR and advanced image processing"""
-        try:
-            response = self.session.get(url, timeout=self.timeout)
-            response.raise_for_status()
-            with tempfile.NamedTemporaryFile(suffix='.jpg') as temp_file:
-                temp_file.write(response.content)
-                temp_file.flush()
-                # Load image with OpenCV
-                img = cv2.imread(temp_file.name)
-                if img is None:
-                    raise ValueError("Failed to load image")
-                # Image preprocessing for better OCR
-                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-                denoised = cv2.fastNlMeansDenoising(gray)
-                thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
-                # Extract text using Tesseract
-                text = pytesseract.image_to_string(thresh)
-                cleaned_text = self.advanced_text_cleaning(text) if text else None
-                # Extract metadata and additional image features
-                with Image.open(temp_file.name) as pil_img:
-                    exif = pil_img._getexif() if hasattr(pil_img, '_getexif') else None
-                    metadata = {
-                        'format': pil_img.format,
-                        'mode': pil_img.mode,
-                        'size': pil_img.size,
-                        'exif': exif,
-                        'image_features': {
-                            'resolution': img.shape,
-                            'channels': img.shape[2] if len(img.shape) > 2 else 1,
-                            'mean_brightness': np.mean(gray),
-                            'has_text': bool(cleaned_text and cleaned_text.strip())
-                        }
-                    }
-                return {
-                    'content': cleaned_text,
-                    'metadata': metadata,
                     'content_type': response.headers.get('Content-Type', ''),
                     'timestamp': datetime.now().isoformat()
                 }
-        except Exception as e:
-            logger.error(f"Image processing failed: {e}")
-            return None
-    def _fetch_json_content(self, url: str) -> Optional[Dict]:
-        """Process JSON content"""
-        try:
-            response = self.session.get(url, timeout=self.timeout)
-            response.raise_for_status()
-            content = response.json()
-            return {
-                'content': json.dumps(content, indent=2),
-                'content_type': 'application/json',
-                'timestamp': datetime.now().isoformat()
-            }
-        except Exception as e:
-            logger.error(f"JSON processing failed: {e}")
-            return None
-    def _fetch_text_content(self, url: str) -> Optional[Dict]:
-        """Process plain text content"""
-        try:
-            response = self.session.get(url, timeout=self.timeout)
-            response.raise_for_status()
-            cleaned_content = self.advanced_text_cleaning(response.text)
             return {
                 'content': cleaned_content,
                 'content_type': response.headers.get('Content-Type', ''),
                 'timestamp': datetime.now().isoformat()
             }
         except Exception as e:
-            logger.error(f"Text processing failed: {e}")
             return None
 class FileProcessor:
     """Class to handle file processing"""
     def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):  # 2GB default
         self.max_file_size = max_file_size
         self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
     def is_text_file(self, filepath: str) -> bool:
         """Check if file is a text file"""
         try:
@@ -389,117 +238,238 @@ class FileProcessor:
                             logger.error(f"Error reading file {filename}: {str(e)}")
         return results
-def _process_single_file(self, file) -> List[Dict]:
     try:
-        file_stat = os.stat(file.name)
-        # For very large files, read in chunks and summarize
-        if file_stat.st_size > 100 * 1024 * 1024:  # 100MB
-            logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
-            # Read first and last 1MB for extremely large files
-            content = ""
-            with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
-                content = f.read(1 * 1024 * 1024)  # First 1MB
-                content += "\n...[Content truncated due to large file size]...\n"
-                # Seek to the last 1MB
-                f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
-                content += f.read()  # Last 1MB
         else:
-            # Regular file processing
-            with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
-                content = f.read()
-        return [{
-            'source': 'file',
-            'filename': os.path.basename(file.name),
-            'file_size': file_stat.st_size,
-            'mime_type': mimetypes.guess_type(file.name)[0],
-            'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
-            'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
-            'content': content,
-            'timestamp': datetime.now().isoformat()
-        }]
     except Exception as e:
-        logger.error(f"File processing error: {e}")
         return []
-def generate_qr_code(json_data):
-    """Generate a QR code from JSON data."""
-    qr = qrcode.make(json_data)
-    qr_path = "output/qr_code.png"
-    qr.save(qr_path)
-    return qr_path
 def create_interface():
     """Create a comprehensive Gradio interface with advanced features"""
     css = """
     .container { max-width: 1200px; margin: auto; }
-    .warning { background-color: #fff3cd; color: #856404; }
-    .error { background-color: #f8d7da; color: #721c24; }
     """
-    with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
-        gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
         with gr.Tab("URL Processing"):
             url_input = gr.Textbox(
                 label="Enter URLs (comma or newline separated)",
                 lines=5,
-                placeholder="https://example1.com\nhttps://example2.com"
             )
         with gr.Tab("File Input"):
             file_input = gr.File(
                 label="Upload text file or ZIP archive",
                 file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
             )
-        with gr.Tab("Text Input"):
-            text_input = gr.Textbox(
-                label="Raw Text Input",
-                lines=5,
-                placeholder="Paste your text here..."
-            )
-        with gr.Tab("JSON Editor"):
-            json_editor = gr.Textbox(
-                label="JSON Editor",
-                lines=20,
-                placeholder="View and edit your JSON data here...",
-                interactive=True,
-                elem_id="json-editor"  # Optional: for custom styling
             )
-        with gr.Tab("Scratchpad"):
-            scratchpad = gr.Textbox(
-                label="Scratchpad",
-                lines=10,
-                placeholder="Quick notes or text collections...",
-                interactive=True
             )
-        process_btn = gr.Button("Process Input", variant="primary")
-        qr_btn = gr.Button("Generate QR Code", variant="secondary")
-        output_text = gr.Textbox(label="Processing Results", interactive=False)
-        output_file = gr.File(label="Processed Output")
-        qr_output = gr.Image(label="QR Code", type="filepath")  # To display the generated QR code
-        def process_all_inputs(urls, file, text, notes):
-            """Process all input types with progress tracking"""
             try:
-                processor = URLProcessor()
-                file_processor = FileProcessor()
                 results = []
-                # Process URLs
-                if urls:
                     url_list = re.split(r'[,\n]', urls)
                     url_list = [url.strip() for url in url_list if url.strip()]
                     for url in url_list:
                         validation = processor.validate_url(url)
                         if validation.get('is_valid'):
@@ -511,80 +481,73 @@ def create_interface():
                                     'content': content,
                                     'timestamp': datetime.now().isoformat()
                                 })
-                # Process files
                 if file:
-                    results.extend(file_processor.process_file(file))
-                # Process text input
-                if text:
-                    cleaned_text = processor.advanced_text_cleaning(text)
-                    results.append({
-                        'source': 'direct_input',
-                        'content': cleaned_text,
-                        'timestamp': datetime.now().isoformat()
-                    })
-                # Generate output
                 if results:
-                    output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
-                    output_dir.mkdir(parents=True, exist_ok=True)
-                    output_path = output_dir / f'processed_{int(time.time())}.json'
-                    with open(output_path, 'w', encoding='utf-8') as f:
-                        json.dump(results, f, ensure_ascii=False, indent=2)
-                    summary = f"Processed {len(results)} items successfully!"
-                    json_data = json.dumps(results, indent=2)  # Prepare JSON for QR code
-                    return str(output_path), summary, json_data  # Return JSON for editor
                 else:
-                    return None, "No valid content to process.", ""
             except Exception as e:
                 logger.error(f"Processing error: {e}")
-                return None, f"Error: {str(e)}", ""
-        def generate_qr(json_data):
-            """Generate QR code from JSON data and return the file path."""
-            if json_data:
-                return generate_qr_code(json_data)
-            return None
         process_btn.click(
             process_all_inputs,
-            inputs=[url_input, file_input, text_input, scratchpad],
-            outputs=[output_file, output_text, json_editor]  # Update outputs to include JSON editor
-        )
-        qr_btn.click(
-            generate_qr,
-            inputs=json_editor,
-            outputs=qr_output
         )
         gr.Markdown("""
-        ### Usage Guidelines
-        - **URL Processing**: Enter valid HTTP/HTTPS URLs
-        - **File Input**: Upload text files or ZIP archives
-        - **Text Input**: Direct text processing
-        - **JSON Editor**: View and edit your JSON data
-        - **Scratchpad**: Quick notes or text collections
-        - Advanced cleaning and validation included
         """)
     return interface
 def main():
     # Configure system settings
     mimetypes.init()
     # Create and launch interface
     interface = create_interface()
     # Launch with proper configuration
     interface.launch(
         server_name="0.0.0.0",
-        server_port=7860,
         show_error=True,
         share=False,
         inbrowser=True,

 import time
 import logging
 import mimetypes
+import concurrent.futures
+import string
+import zipfile
 import tempfile
 from datetime import datetime
+from typing import List, Dict, Optional, Union
 from pathlib import Path
 from urllib.parse import urlparse
 import requests
 import validators
 import gradio as gr
 from diskcache import Cache
 from bs4 import BeautifulSoup
 from fake_useragent import UserAgent
+from ratelimit import limits, sleep_and_retry
 from cleantext import clean
 import qrcode
+# Setup logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
 )
 logger = logging.getLogger(__name__)
+# Ensure output directories exist
+Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
 class URLProcessor:
     def __init__(self):
         self.session = requests.Session()
             'Connection': 'keep-alive',
             'Upgrade-Insecure-Requests': '1'
         })
     def advanced_text_cleaning(self, text: str) -> str:
         """Robust text cleaning with version compatibility"""
         try:
             if not validators.url(url):
                 return {'is_valid': False, 'message': 'Invalid URL format'}
             response = self.session.head(url, timeout=self.timeout)
             response.raise_for_status()
             return {'is_valid': True, 'message': 'URL is valid and accessible'}
             return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
     def fetch_content(self, url: str) -> Optional[Dict]:
+        """Universal content fetcher with special case handling"""
         try:
+            # Google Drive document handling
             if 'drive.google.com' in url:
                 return self._handle_google_drive(url)
+            # Google Calendar ICS handling
             if 'calendar.google.com' in url and 'ical' in url:
                 return self._handle_google_calendar(url)
+            # Standard HTML processing
+            return self._fetch_html_content(url)
         except Exception as e:
             logger.error(f"Content fetch failed: {e}")
             return None
             if not file_id:
                 logger.error(f"Invalid Google Drive URL: {url}")
                 return None
             direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
             response = self.session.get(direct_url, timeout=self.timeout)
             response.raise_for_status()
             return {
                 'content': response.text,
                 'content_type': response.headers.get('Content-Type', ''),
             return None
     def _fetch_html_content(self, url: str) -> Optional[Dict]:
+        """Standard HTML content processing"""
         try:
             response = self.session.get(url, timeout=self.timeout)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, 'html.parser')
             # Remove unwanted elements
             for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
                 element.decompose()
             # Extract main content
             main_content = soup.find('main') or soup.find('article') or soup.body
+            if main_content is None:
+                logger.warning(f"No main content found for URL: {url}")
                 return {
+                    'content': '',
                     'content_type': response.headers.get('Content-Type', ''),
                     'timestamp': datetime.now().isoformat()
                 }
+            # Clean and structure content
+            text_content = main_content.get_text(separator='\n', strip=True)
+            cleaned_content = self.advanced_text_cleaning(text_content)
             return {
                 'content': cleaned_content,
                 'content_type': response.headers.get('Content-Type', ''),
                 'timestamp': datetime.now().isoformat()
             }
         except Exception as e:
+            logger.error(f"HTML processing failed: {e}")
             return None
 class FileProcessor:
     """Class to handle file processing"""
     def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):  # 2GB default
         self.max_file_size = max_file_size
         self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
     def is_text_file(self, filepath: str) -> bool:
         """Check if file is a text file"""
         try:
                             logger.error(f"Error reading file {filename}: {str(e)}")
         return results
+    def _process_single_file(self, file) -> List[Dict]:
+        """Process a single file"""
+        try:
+            file_stat = os.stat(file.name)
+            # For very large files, read in chunks and summarize
+            if file_stat.st_size > 100 * 1024 * 1024:  # 100MB
+                logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
+                # Read first and last 1MB for extremely large files
+                content = ""
+                with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read(1 * 1024 * 1024)  # First 1MB
+                    content += "\n...[Content truncated due to large file size]...\n"
+                    # Seek to the last 1MB
+                    f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
+                    content += f.read()  # Last 1MB
+            else:
+                # Regular file processing
+                with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+            return [{
+                'source': 'file',
+                'filename': os.path.basename(file.name),
+                'file_size': file_stat.st_size,
+                'mime_type': mimetypes.guess_type(file.name)[0],
+                'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
+                'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
+                'content': content,
+                'timestamp': datetime.now().isoformat()
+            }]
+        except Exception as e:
+            logger.error(f"File processing error: {e}")
+            return []
+def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
+    """Clean and validate JSON data"""
     try:
+        # If it's a string, try to parse it
+        if isinstance(data, str):
+            # Remove any existing content and extra whitespace
+            data = data.strip()
+            data = json.loads(data)
+        # Convert to string and back to ensure proper JSON format
+        cleaned = json.loads(json.dumps(data))
+        return cleaned
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON cleaning error: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error while cleaning JSON: {e}")
+        return None
+def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
+    """Generate QR code(s) from data"""
+    try:
+        output_dir = Path('output/qr_codes')
+        output_dir.mkdir(parents=True, exist_ok=True)
+        if combined:
+            # Generate single QR code for all data
+            cleaned_data = clean_json(data)
+            if cleaned_data:
+                qr = qrcode.QRCode(
+                    version=None,
+                    error_correction=qrcode.constants.ERROR_CORRECT_L,
+                    box_size=10,
+                    border=4,
+                )
+                json_str = json.dumps(cleaned_data, ensure_ascii=False)
+                qr.add_data(json_str)
+                qr.make(fit=True)
+                img = qr.make_image(fill_color="black", back_color="white")
+                output_path = output_dir / f'combined_qr_{int(time.time())}.png'
+                img.save(str(output_path))
+                return [str(output_path)]
         else:
+            # Generate separate QR codes for each item
+            if isinstance(data, list):
+                paths = []
+                for idx, item in enumerate(data):
+                    cleaned_item = clean_json(item)
+                    if cleaned_item:
+                        qr = qrcode.QRCode(
+                            version=None,
+                            error_correction=qrcode.constants.ERROR_CORRECT_L,
+                            box_size=10,
+                            border=4,
+                        )
+                        json_str = json.dumps(cleaned_item, ensure_ascii=False)
+                        qr.add_data(json_str)
+                        qr.make(fit=True)
+                        img = qr.make_image(fill_color="black", back_color="white")
+                        output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
+                        img.save(str(output_path))
+                        paths.append(str(output_path))
+                return paths
+            else:
+                # Single item, not combined
+                cleaned_item = clean_json(data)
+                if cleaned_item:
+                    qr = qrcode.QRCode(
+                        version=None,
+                        error_correction=qrcode.constants.ERROR_CORRECT_L,
+                        box_size=10,
+                        border=4,
+                    )
+                    json_str = json.dumps(cleaned_item, ensure_ascii=False)
+                    qr.add_data(json_str)
+                    qr.make(fit=True)
+                    img = qr.make_image(fill_color="black", back_color="white")
+                    output_path = output_dir / f'single_qr_{int(time.time())}.png'
+                    img.save(str(output_path))
+                    return [str(output_path)]
+        return []
     except Exception as e:
+        logger.error(f"QR generation error: {e}")
         return []
 def create_interface():
     """Create a comprehensive Gradio interface with advanced features"""
     css = """
     .container { max-width: 1200px; margin: auto; }
+    .warning { background-color: #fff3cd; color: #856404; padding: 10px; border-radius: 4px; }
+    .error { background-color: #f8d7da; color: #721c24; padding: 10px; border-radius: 4px; }
+    .success { background-color: #d4edda; color: #155724; padding: 10px; border-radius: 4px; }
     """
+    with gr.Blocks(css=css, title="Advanced Data Processor & QR Generator") as interface:
+        gr.Markdown("# 🌐 Advanced Data Processing & QR Code Generator")
         with gr.Tab("URL Processing"):
             url_input = gr.Textbox(
                 label="Enter URLs (comma or newline separated)",
                 lines=5,
+                placeholder="https://example1.com\nhttps://example2.com",
+                value=""
             )
         with gr.Tab("File Input"):
             file_input = gr.File(
                 label="Upload text file or ZIP archive",
                 file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
             )
+        with gr.Tab("Notepad"):
+            text_input = gr.TextArea(
+                label="JSON Data Input",
+                lines=15,
+                placeholder="Paste your JSON data here...",
+                value=""
             )
+            with gr.Row():
+                example_btn = gr.Button("📝 Load Example JSON", variant="secondary")
+                clear_btn = gr.Button("🗑️ Clear Input", variant="secondary")
+        with gr.Row():
+            combine_data = gr.Checkbox(
+                label="Combine all data into single QR code",
+                value=True,
+                info="Generate one QR code for all data, or separate QR codes for each item"
             )
+            process_btn = gr.Button("🔄 Process & Generate QR", variant="primary", scale=2)
+        output_json = gr.JSON(label="Processed JSON Data")
+        output_gallery = gr.Gallery(label="Generated QR Codes", columns=2, height=400)
+        output_text = gr.Textbox(label="Processing Status", interactive=False)
+        def load_example():
+            example_json = {
+                "type": "product_catalog",
+                "items": [
+                    {
+                        "id": "123",
+                        "name": "Test Product",
+                        "description": "This is a test product description",
+                        "price": 29.99,
+                        "category": "electronics",
+                        "tags": ["test", "sample", "demo"]
+                    },
+                    {
+                        "id": "456",
+                        "name": "Another Product",
+                        "description": "Another test product description",
+                        "price": 49.99,
+                        "category": "accessories",
+                        "tags": ["sample", "test"]
+                    }
+                ],
+                "metadata": {
+                    "timestamp": datetime.now().isoformat(),
+                    "version": "1.0",
+                    "source": "example"
+                }
+            }
+            return json.dumps(example_json, indent=2)
+        def clear_input():
+            return ""
+        def process_all_inputs(urls, file, text, combine):
+            """Process all input types and generate QR codes"""
             try:
                 results = []
+                # Process text input first (since it's direct JSON)
+                if text and text.strip():
+                    try:
+                        # Try to parse as JSON
+                        json_data = json.loads(text)
+                        if isinstance(json_data, list):
+                            results.extend(json_data)
+                        else:
+                            results.append(json_data)
+                    except json.JSONDecodeError as e:
+                        return None, [], f"❌ Invalid JSON format: {str(e)}"
+                # Process URLs if provided
+                if urls and urls.strip():
+                    processor = URLProcessor()
                     url_list = re.split(r'[,\n]', urls)
                     url_list = [url.strip() for url in url_list if url.strip()]
                     for url in url_list:
                         validation = processor.validate_url(url)
                         if validation.get('is_valid'):
                                     'content': content,
                                     'timestamp': datetime.now().isoformat()
                                 })
+                # Process files if provided
                 if file:
+                    file_processor = FileProcessor()
+                    file_results = file_processor.process_file(file)
+                    if file_results:
+                        results.extend(file_results)
+                # Generate QR codes
                 if results:
+                    qr_paths = generate_qr_code(results, combined=combine)
+                    if qr_paths:
+                        return (
+                            results,
+                            [str(path) for path in qr_paths],
+                            f"✅ Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
+                        )
+                    else:
+                        return None, [], "❌ Failed to generate QR codes. Please check the input data."
                 else:
+                    return None, [], "⚠️ No valid content to process. Please provide some input data."
             except Exception as e:
                 logger.error(f"Processing error: {e}")
+                return None, [], f"❌ Error: {str(e)}"
+        # Set up event handlers
+        example_btn.click(load_example, outputs=[text_input])
+        clear_btn.click(clear_input, outputs=[text_input])
         process_btn.click(
             process_all_inputs,
+            inputs=[url_input, file_input, text_input, combine_data],
+            outputs=[output_json, output_gallery, output_text]
         )
         gr.Markdown("""
+        ### Features
+        - **URL Processing**: Extract content from websites
+        - **File Processing**: Handle text files and archives
+        - **Notepad**: Direct JSON data input/manipulation
+        - **JSON Cleaning**: Automatic JSON validation and formatting
+        - **QR Generation**: Generate QR codes with embedded JSON data
+        - **Flexible Output**: Choose between combined or separate QR codes
+        ### Usage Tips
+        1. Use the **Notepad** tab for direct JSON input
+        2. Click "Load Example JSON" to see a sample format
+        3. Choose whether to combine all data into a single QR code
+        4. The generated QR codes will contain the complete JSON data
         """)
     return interface
 def main():
     # Configure system settings
     mimetypes.init()
+    # Create output directories
+    Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
     # Create and launch interface
     interface = create_interface()
     # Launch with proper configuration
     interface.launch(
         server_name="0.0.0.0",
+        server_port=8000,
         show_error=True,
         share=False,
         inbrowser=True,