Spaces:

theshresthshukla
/

tunnel

Sleeping

+"""
+Configuration settings for the Logo Downloader application
+"""
+import os
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+# API Configuration
+GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', '')
+# Directory Configuration
+BASE_DIR = Path(__file__).parent
+# DOWNLOADS_DIR = BASE_DIR / 'downloads'
+DOWNLOADS_DIR = Path('downloads')
+TEMP_DIR = BASE_DIR / 'temp'
+# Download Configuration
+MAX_ENTITIES = 20
+MAX_LOGOS_PER_ENTITY = 15
+DEFAULT_LOGOS_PER_ENTITY = 10
+DOWNLOAD_TIMEOUT = 15
+REQUEST_DELAY = 1  # seconds between requests
+# File Configuration
+ALLOWED_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.svg', '.webp']
+MIN_FILE_SIZE = 500  # bytes
+MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+# HTTP Configuration
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+    'Accept-Language': 'en-US,en;q=0.5',
+    'Accept-Encoding': 'gzip, deflate',
+    'Connection': 'keep-alive',
+    'Upgrade-Insecure-Requests': '1',
+}
+# Image signatures for validation
+IMAGE_SIGNATURES = [
+    b'\x89PNG',      # PNG
+    b'\xff\xd8\xff', # JPEG
+    b'<svg',         # SVG
+    b'RIFF',         # WebP
+    b'GIF8',         # GIF
+]
+# Common tech entities for fallback
+COMMON_TECH_ENTITIES = [
+    'Microsoft', 'Google', 'Apple', 'Amazon', 'Adobe', 'React', 'Angular', 'Vue',
+    'Docker', 'Kubernetes', 'AWS', 'Azure', 'Firebase', 'MongoDB', 'PostgreSQL',
+    'Redis', 'Node.js', 'Python', 'JavaScript', 'TypeScript', 'Figma', 'Sketch',
+    'Photoshop', 'Illustrator', 'AutoCAD', 'Unity', 'Blender', 'GitHub', 'GitLab',
+    'Slack', 'Discord', 'Zoom', 'Teams', 'Spotify', 'Netflix', 'Instagram',
+    'Facebook', 'Twitter', 'LinkedIn', 'TikTok', 'WhatsApp', 'Telegram',
+    'Shopify', 'WordPress', 'Salesforce', 'Microsoft Fabric'
+]

src/services/entity_extractor.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""
+Entity extraction module using Gemini AI with fallback methods
+"""
+import re
+import logging
+from typing import List, Optional
+import google.generativeai as genai
+from services.appconfig import GEMINI_API_KEY, COMMON_TECH_ENTITIES, MAX_ENTITIES
+logger = logging.getLogger(__name__)
+class EntityExtractor:
+    """Extract entities from text using Gemini AI or fallback methods"""
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize EntityExtractor
+        Args:
+            api_key (str, optional): Gemini API key
+        """
+        self.api_key = api_key or GEMINI_API_KEY
+        self.model = None
+        self._setup_gemini()
+    def _setup_gemini(self) -> None:
+        """Setup Gemini API"""
+        if not self.api_key:
+            logger.warning("No Gemini API key provided, using fallback method")
+            return
+        try:
+            genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
+            logger.info("Gemini API initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Gemini API: {e}")
+            self.model = None
+    def extract_with_gemini(self, text: str) -> List[str]:
+        """
+        Extract entities using Gemini AI
+        Args:
+            text (str): Input text
+        Returns:
+            List[str]: List of extracted entities
+        """
+        if not self.model:
+            raise Exception("Gemini model not available")
+        prompt = """
+        Extract company names, product names, software names, tool names, and brand names from this text.
+        Only return names that would have recognizable logos (like Microsoft, Adobe, React, etc.).
+        Return as a simple list, one name per line, no bullet points or numbers.
+        Avoid generic terms like "cloud" or "database".
+        Text: {text}
+        """.format(text=text)
+        try:
+            response = self.model.generate_content(prompt)
+            if not response.text:
+                return []
+            entities = [
+                line.strip()
+                for line in response.text.strip().split('\n')
+                if line.strip() and not line.strip().startswith('-') and len(line.strip()) > 1
+            ]
+            # Filter out common words that aren't entities
+            filtered_entities = []
+            for entity in entities:
+                if self._is_valid_entity(entity):
+                    filtered_entities.append(entity)
+            logger.info(f"Gemini extracted {len(filtered_entities)} entities")
+            return filtered_entities[:MAX_ENTITIES]
+        except Exception as e:
+            logger.error(f"Gemini extraction failed: {e}")
+            raise
+    def extract_with_fallback(self, text: str) -> List[str]:
+        """
+        Extract entities using fallback pattern matching
+        Args:
+            text (str): Input text
+        Returns:
+            List[str]: List of extracted entities
+        """
+        entities = []
+        # Find common tech entities
+        for tech_entity in COMMON_TECH_ENTITIES:
+            if tech_entity.lower() in text.lower():
+                entities.append(tech_entity)
+        # Find capitalized words (likely proper nouns)
+        cap_words = re.findall(r'\b[A-Z][a-zA-Z]{2,}\b', text)
+        for word in cap_words:
+            if self._is_valid_entity(word) and word not in entities:
+                entities.append(word)
+        # Find words with specific patterns (e.g., Node.js, C++)
+        pattern_words = re.findall(r'\b[A-Z][a-zA-Z]*\.[a-zA-Z]+\b', text)
+        for word in pattern_words:
+            if word not in entities:
+                entities.append(word)
+        # Remove duplicates while preserving order
+        unique_entities = []
+        seen = set()
+        for entity in entities:
+            if entity.lower() not in seen:
+                seen.add(entity.lower())
+                unique_entities.append(entity)
+        logger.info(f"Fallback extracted {len(unique_entities)} entities")
+        return unique_entities[:MAX_ENTITIES]
+    def _is_valid_entity(self, entity: str) -> bool:
+        """
+        Check if entity is valid for logo extraction
+        Args:
+            entity (str): Entity name
+        Returns:
+            bool: True if valid entity
+        """
+        # Filter out common words that aren't brand names
+        invalid_words = {
+            'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
+            'by', 'from', 'up', 'about', 'into', 'through', 'during', 'before',
+            'after', 'above', 'below', 'between', 'among'}
+        # 'cloud', 'database',
+        #     'server', 'client', 'user', 'admin', 'data', 'system', 'network',
+        #     'security', 'management', 'development', 'application', 'platform',
+        #     'service', 'solution', 'technology', 'software', 'hardware', 'tool'
+        # }
+        entity_lower = entity.lower()
+        # Check length
+        if len(entity) < 2 or len(entity) > 50:
+            return False
+        # Check if it's a common invalid word
+        if entity_lower in invalid_words:
+            return False
+        # Must contain at least one letter
+        if not re.search(r'[a-zA-Z]', entity):
+            return False
+        return True
+    def extract_entities(self, text: str) -> List[str]:
+        """
+        Extract entities from text using available methods
+        Args:
+            text (str): Input text
+        Returns:
+            List[str]: List of extracted entities
+        """
+        if not text or not text.strip():
+            return []
+        logger.info("Starting entity extraction...")
+        # Try Gemini first
+        if self.model:
+            try:
+                entities = self.extract_with_gemini(text)
+                if entities:
+                    logger.info(f"Successfully extracted {len(entities)} entities with Gemini")
+                    return entities
+            except Exception as e:
+                logger.warning(f"Gemini extraction failed, using fallback: {e}")
+        # Use fallback method
+        entities = self.extract_with_fallback(text)
+        logger.info(f"Extracted {len(entities)} entities using fallback method")
+        return entities

src/services/image_downloader.py ADDED Viewed

	@@ -0,0 +1,278 @@

+"""
+Image downloading module with multiple search providers
+"""
+import os
+import json
+import logging
+from typing import List, Tuple
+from urllib.parse import quote_plus, urlparse
+import requests
+from bs4 import BeautifulSoup
+from services.appconfig import HEADERS, DOWNLOAD_TIMEOUT, REQUEST_DELAY, ALLOWED_EXTENSIONS
+from utils.utils import is_valid_image_file, get_file_extension, clean_up_file, rate_limit_delay
+logger = logging.getLogger(__name__)
+class ImageDownloader:
+    """Download images from various search providers"""
+    def __init__(self):
+        """Initialize ImageDownloader"""
+        self.session = requests.Session()
+        self.session.headers.update(HEADERS)
+    def get_bing_image_urls(self, entity: str, num_images: int = 15) -> List[str]:
+        """
+        Get image URLs from Bing search
+        Args:
+            entity (str): Entity name to search for
+            num_images (int): Maximum number of URLs to return
+        Returns:
+            List[str]: List of image URLs
+        """
+        logger.info(f"Searching Bing for {entity} logos...")
+        query = f"{entity} logo png transparent high quality"
+        encoded_query = quote_plus(query)
+        search_url = f"https://www.bing.com/images/search?q={encoded_query}&form=HDRSC2&first=1&tsc=ImageBasicHover"
+        try:
+            response = self.session.get(search_url, timeout=DOWNLOAD_TIMEOUT, verify=False)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            image_urls = []
+            # Find image data in Bing's format
+            img_containers = soup.find_all('a', {'class': 'iusc'})
+            for container in img_containers:
+                m_attr = container.get('m')
+                if m_attr:
+                    try:
+                        img_data = json.loads(m_attr)
+                        img_url = img_data.get('murl') or img_data.get('turl')
+                        if img_url and self._is_valid_image_url(img_url):
+                            image_urls.append(img_url)
+                    except json.JSONDecodeError:
+                        continue
+            # Fallback: regular img tags
+            if len(image_urls) < 5:
+                img_tags = soup.find_all('img')
+                for img in img_tags:
+                    src = img.get('src') or img.get('data-src')
+                    if src and self._is_valid_image_url(src) and 'logo' in src.lower():
+                        if src.startswith('http'):
+                            image_urls.append(src)
+            logger.info(f"Found {len(image_urls)} URLs from Bing")
+            return image_urls[:num_images]
+        except Exception as e:
+            logger.error(f"Bing search failed for {entity}: {e}")
+            return []
+    def get_duckduckgo_image_urls(self, entity: str, num_images: int = 15) -> List[str]:
+        """
+        Get image URLs from DuckDuckGo search
+        Args:
+            entity (str): Entity name to search for
+            num_images (int): Maximum number of URLs to return
+        Returns:
+            List[str]: List of image URLs
+        """
+        logger.info(f"Searching DuckDuckGo for {entity} logos...")
+        query = f"{entity} logo hd png transparent"
+        encoded_query = quote_plus(query)
+        search_url = f"https://duckduckgo.com/?q={encoded_query}&t=h_&iax=images&ia=images"
+        try:
+            response = self.session.get(search_url, timeout=DOWNLOAD_TIMEOUT,verify=False)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            image_urls = []
+            img_tags = soup.find_all('img')
+            for img in img_tags:
+                src = img.get('src') or img.get('data-src')
+                if src and self._is_valid_image_url(src) and src.startswith('http'):
+                    image_urls.append(src)
+            logger.info(f"Found {len(image_urls)} URLs from DuckDuckGo")
+            return image_urls[:num_images]
+        except Exception as e:
+            logger.error(f"DuckDuckGo search failed for {entity}: {e}")
+            return []
+    def get_alternative_logo_sources(self, entity: str) -> List[str]:
+        """
+        Get URLs from alternative logo sources
+        Args:
+            entity (str): Entity name
+        Returns:
+            List[str]: List of alternative logo URLs
+        """
+        urls = []
+        entity_clean = entity.lower().replace(' ', '').replace('.', '')
+        entity_hyphen = entity.lower().replace(' ', '-')
+        # Try various logo services
+        logo_sources = [
+            f"https://cdn.worldvectorlogo.com/logos/{entity_hyphen}.svg",
+            f"https://logos-world.net/wp-content/uploads/2020/11/{entity.replace(' ', '-')}-Logo.png",
+            f"https://logoeps.com/wp-content/uploads/2013/03/vector-{entity_clean}-logo.png",
+            f"https://1000logos.net/wp-content/uploads/2016/10/{entity.replace(' ', '-')}-Logo.png",
+        ]
+        for url in logo_sources:
+            try:
+                response = self.session.head(url, timeout=5)
+                if response.status_code == 200:
+                    urls.append(url)
+                    logger.info(f"Found alternative logo: {url}")
+            except Exception:
+                continue
+        return urls
+    def _is_valid_image_url(self, url: str) -> bool:
+        """
+        Check if URL is a valid image URL
+        Args:
+            url (str): URL to check
+        Returns:
+            bool: True if valid image URL
+        """
+        if not url:
+            return False
+        # Check if URL contains image extension
+        url_lower = url.lower()
+        return any(ext in url_lower for ext in ALLOWED_EXTENSIONS)
+    def download_image(self, url: str, filepath: str) -> bool:
+        """
+        Download image from URL
+        Args:
+            url (str): Image URL
+            filepath (str): Local filepath to save image
+        Returns:
+            bool: True if download successful
+        """
+        try:
+            logger.debug(f"Downloading: {url}")
+            response = self.session.get(url, timeout=DOWNLOAD_TIMEOUT, stream=True,verify=False)
+            response.raise_for_status()
+            # Check content type
+            content_type = response.headers.get('content-type', '').lower()
+            if not any(img_type in content_type for img_type in ['image', 'svg']):
+                logger.warning(f"Invalid content type for {url}: {content_type}")
+                return False
+            # Download with streaming
+            with open(filepath, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            # Validate downloaded file
+            if is_valid_image_file(filepath):
+                logger.debug(f"Successfully downloaded: {filepath}")
+                return True
+            else:
+                clean_up_file(filepath)
+                logger.warning(f"Downloaded invalid image: {url}")
+                return False
+        except Exception as e:
+            clean_up_file(filepath)
+            logger.error(f"Download failed for {url}: {e}")
+            return False
+    def download_logos_for_entity(self, entity: str, entity_folder: str, num_logos: int = 10) -> Tuple[int, List[str]]:
+        """
+        Download logos for a single entity
+        Args:
+            entity (str): Entity name
+            entity_folder (str): Folder to save logos
+            num_logos (int): Number of logos to download
+        Returns:
+            Tuple[int, List[str]]: (number downloaded, list of downloaded files)
+        """
+        logger.info(f"Downloading top {num_logos} logos for: {entity}")
+        # Collect URLs from all sources
+        all_urls = []
+        # Alternative logo services
+        alt_urls = self.get_alternative_logo_sources(entity)
+        all_urls.extend(alt_urls)
+        # Bing search
+        bing_urls = self.get_bing_image_urls(entity, 20)
+        all_urls.extend(bing_urls)
+        # DuckDuckGo search
+        ddg_urls = self.get_duckduckgo_image_urls(entity, 15)
+        all_urls.extend(ddg_urls)
+        # Remove duplicates while preserving order
+        unique_urls = []
+        seen = set()
+        for url in all_urls:
+            if url not in seen:
+                seen.add(url)
+                unique_urls.append(url)
+        if not unique_urls:
+            logger.warning(f"No URLs found for {entity}")
+            return 0, []
+        logger.info(f"Found {len(unique_urls)} unique URLs for {entity}")
+        # Download images
+        downloaded_files = []
+        downloaded_count = 0
+        for i, url in enumerate(unique_urls):
+            if downloaded_count >= num_logos:
+                break
+            try:
+                extension = get_file_extension(url)
+                filename = f"{entity.replace(' ', '_')}_logo_{downloaded_count + 1}{extension}"
+                filepath = os.path.join(entity_folder, filename)
+                if self.download_image(url, filepath):
+                    downloaded_count += 1
+                    downloaded_files.append(filepath)
+                    logger.info(f"Downloaded ({downloaded_count}/{num_logos}): {filename}")
+                # Be respectful to servers
+                rate_limit_delay(REQUEST_DELAY)
+            except Exception as e:
+                logger.error(f"Error processing URL {url}: {e}")
+                continue
+        logger.info(f"Successfully downloaded {downloaded_count}/{num_logos} logos for {entity}")
+        return downloaded_count, downloaded_files

src/services/logo_downloader.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+Main Logo Downloader class that orchestrates the entire process
+"""
+import os
+import zipfile
+import logging
+from pathlib import Path
+from typing import List, Tuple, Dict, Optional
+from services.appconfig import DOWNLOADS_DIR, DEFAULT_LOGOS_PER_ENTITY
+from utils.utils import create_safe_filename, create_directory, format_file_size
+from .entity_extractor import EntityExtractor
+from .image_downloader import ImageDownloader
+logger = logging.getLogger(__name__)
+class LogoDownloader:
+    """Main class for downloading logos based on extracted entities"""
+    def __init__(self, gemini_api_key: str, output_dir: Optional[str] = None):
+        """
+        Initialize LogoDownloader
+        Args:
+            gemini_api_key (str): Gemini API key for entity extraction
+            output_dir (str): Directory to save downloads
+        """
+        self.output_dir = Path(output_dir) if output_dir else DOWNLOADS_DIR
+        self.entity_extractor = EntityExtractor(gemini_api_key)
+        self.image_downloader = ImageDownloader()
+        self.stats = {
+            'total_entities': 0,
+            'total_downloads': 0,
+            'successful_entities': 0,
+            'failed_entities': 0
+        }
+        # Create output directory
+        create_directory(self.output_dir)
+    def process_text(self, text: str, logos_per_entity: int = DEFAULT_LOGOS_PER_ENTITY) -> Dict:
+        """
+        Main processing function: extract entities and download logos
+        Args:
+            text (str): Input text containing entity references
+            logos_per_entity (int): Number of logos to download per entity
+        Returns:
+            Dict: Processing results and statistics
+        """
+        logger.info("Starting logo download process...")
+        # Reset stats
+        self._reset_stats()
+        # Extract entities
+        entities = self.entity_extractor.extract_entities(text)
+        if not entities:
+            logger.warning("No entities found in text")
+            return self._get_results("No entities found in the provided text")
+        self.stats['total_entities'] = len(entities)
+        logger.info(f"Found {len(entities)} entities: {', '.join(entities)}")
+        # Download logos for each entity
+        results = []
+        for i, entity in enumerate(entities, 1):
+            logger.info(f"Processing [{i}/{len(entities)}]: {entity}")
+            try:
+                result = self._process_single_entity(entity, logos_per_entity)
+                results.append(result)
+                if result['downloaded_count'] > 0:
+                    self.stats['successful_entities'] += 1
+                    self.stats['total_downloads'] += result['downloaded_count']
+                else:
+                    self.stats['failed_entities'] += 1
+            except Exception as e:
+                logger.error(f"Failed to process entity {entity}: {e}")
+                self.stats['failed_entities'] += 1
+                results.append({
+                    'entity': entity,
+                    'downloaded_count': 0,
+                    'files': [],
+                    'error': str(e)
+                })
+        # Create zip package if we have downloads
+        zip_path = None
+        if self.stats['total_downloads'] > 0:
+            zip_path = self._create_zip_package()
+        return self._get_results(
+            "Processing completed successfully",
+            entities=entities,
+            results=results,
+            zip_path=zip_path
+        )
+    def _process_single_entity(self, entity: str, logos_per_entity: int) -> Dict:
+        """
+        Process a single entity: create folder and download logos
+        Args:
+            entity (str): Entity name
+            logos_per_entity (int): Number of logos to download
+        Returns:
+            Dict: Processing result for this entity
+        """
+        safe_name = create_safe_filename(entity)
+        entity_folder = self.output_dir / safe_name
+        # Create entity folder
+        if not create_directory(entity_folder):
+            raise Exception(f"Failed to create directory for {entity}")
+        # Download logos
+        downloaded_count, downloaded_files = self.image_downloader.download_logos_for_entity(
+            entity, str(entity_folder), logos_per_entity
+        )
+        return {
+            'entity': entity,
+            'safe_name': safe_name,
+            'downloaded_count': downloaded_count,
+            'files': downloaded_files,
+            'folder': str(entity_folder)
+        }
+    def _create_zip_package(self) -> str:
+        """
+        Create ZIP package of all downloaded logos
+        Returns:
+            str: Path to created ZIP file
+        """
+        zip_filename = f"{self.output_dir.name}_logos.zip"
+        zip_path = self.output_dir.parent / zip_filename
+        logger.info(f"Creating ZIP package: {zip_path}")
+        try:
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                for root, dirs, files in os.walk(self.output_dir):
+                    for file in files:
+                        file_path = os.path.join(root, file)
+                        arcname = os.path.relpath(file_path, self.output_dir)
+                        zipf.write(file_path, arcname)
+            file_size = os.path.getsize(zip_path)
+            logger.info(f"ZIP package created: {zip_path} ({format_file_size(file_size)})")
+            return str(zip_path)
+        except Exception as e:
+            logger.error(f"Failed to create ZIP package: {e}")
+            raise
+    def _reset_stats(self) -> None:
+        """Reset processing statistics"""
+        self.stats = {
+            'total_entities': 0,
+            'total_downloads': 0,
+            'successful_entities': 0,
+            'failed_entities': 0
+        }
+    def _get_results(self, message: str, **kwargs) -> Dict:
+        """
+        Get formatted results dictionary
+        Args:
+            message (str): Status message
+            **kwargs: Additional result data
+        Returns:
+            Dict: Formatted results
+        """
+        return {
+            'status': 'success' if self.stats['total_downloads'] > 0 else 'warning',
+            'message': message,
+            'stats': self.stats.copy(),
+            **kwargs
+        }
+    def get_stats_summary(self) -> str:
+        """
+        Get human-readable stats summary
+        Returns:
+            str: Stats summary
+        """
+        if self.stats['total_entities'] == 0:
+            return "No entities processed"
+        avg_downloads = (
+            self.stats['total_downloads'] / self.stats['successful_entities']
+            if self.stats['successful_entities'] > 0 else 0
+        )
+        return (
+            f"Processed {self.stats['total_entities']} entities. "
+            f"Successfully downloaded {self.stats['total_downloads']} logos "
+            f"({avg_downloads:.1f} average per entity). "
+            f"Success rate: {self.stats['successful_entities']}/{self.stats['total_entities']}"
+        )
+def download_logos(text: str, gemini_api_key: str, logos_per_entity: int = DEFAULT_LOGOS_PER_ENTITY) -> Dict:
+    """
+    Convenience function for downloading logos
+    Args:
+        text (str): Text containing entity references
+        gemini_api_key (str): Gemini API key
+        logos_per_entity (int): Number of logos per entity
+    Returns:
+        Dict: Processing results
+    """
+    downloader = LogoDownloader(gemini_api_key)
+    return downloader.process_text(text, logos_per_entity)

src/utils/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (4.52 kB). View file

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,178 @@

+"""
+Utility functions for the Logo Downloader application
+"""
+import os
+import re
+import json
+import time
+from pathlib import Path
+from typing import List, Optional
+from urllib.parse import urlparse
+import logging
+from services.appconfig import IMAGE_SIGNATURES, MIN_FILE_SIZE, MAX_FILE_SIZE
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def create_safe_filename(name: str) -> str:
+    """
+    Create a safe filename from entity name
+    Args:
+        name (str): Entity name
+    Returns:
+        str: Safe filename
+    """
+    safe_name = re.sub(r'[^\w\s-]', '', name).strip()
+    safe_name = re.sub(r'[-\s]+', '_', safe_name)
+    return safe_name
+def get_file_extension(url: str) -> str:
+    """
+    Extract file extension from URL
+    Args:
+        url (str): Image URL
+    Returns:
+        str: File extension
+    """
+    parsed_url = urlparse(url)
+    extension = os.path.splitext(parsed_url.path)[1]
+    if not extension or extension.lower() not in ['.png', '.jpg', '.jpeg', '.svg', '.webp']:
+        extension = '.png'
+    return extension
+def is_valid_image_file(filepath: str) -> bool:
+    """
+    Validate if file is a proper image
+    Args:
+        filepath (str): Path to image file
+    Returns:
+        bool: True if valid image
+    """
+    try:
+        # Check file exists and size
+        if not os.path.exists(filepath):
+            return False
+        file_size = os.path.getsize(filepath)
+        if file_size < MIN_FILE_SIZE or file_size > MAX_FILE_SIZE:
+            logger.warning(f"Invalid file size: {file_size}")
+            return False
+        # Check image signature
+        with open(filepath, 'rb') as f:
+            header = f.read(12)
+        for signature in IMAGE_SIGNATURES:
+            if header.startswith(signature):
+                return True
+        return False
+    except Exception as e:
+        logger.error(f"Error validating image: {e}")
+        return False
+def create_directory(path: Path) -> bool:
+    """
+    Create directory if it doesn't exist
+    Args:
+        path (Path): Directory path
+    Returns:
+        bool: True if successful
+    """
+    try:
+        path.mkdir(parents=True, exist_ok=True)
+        return True
+    except Exception as e:
+        logger.error(f"Error creating directory {path}: {e}")
+        return False
+def clean_up_file(filepath: str) -> None:
+    """
+    Remove file if it exists
+    Args:
+        filepath (str): Path to file to remove
+    """
+    try:
+        if os.path.exists(filepath):
+            os.remove(filepath)
+    except Exception as e:
+        logger.error(f"Error removing file {filepath}: {e}")
+def parse_json_safely(json_string: str) -> Optional[dict]:
+    """
+    Safely parse JSON string
+    Args:
+        json_string (str): JSON string to parse
+    Returns:
+        dict or None: Parsed JSON or None if failed
+    """
+    try:
+        return json.loads(json_string)
+    except json.JSONDecodeError:
+        return None
+def rate_limit_delay(delay: float = 1.0) -> None:
+    """
+    Add delay between requests to be respectful to servers
+    Args:
+        delay (float): Delay in seconds
+    """
+    time.sleep(delay)
+def format_file_size(size_bytes: int) -> str:
+    """
+    Format file size in human readable format
+    Args:
+        size_bytes (int): Size in bytes
+    Returns:
+        str: Formatted size string
+    """
+    if size_bytes < 1024:
+        return f"{size_bytes} B"
+    elif size_bytes < 1024 * 1024:
+        return f"{size_bytes / 1024:.1f} KB"
+    else:
+        return f"{size_bytes / (1024 * 1024):.1f} MB"
+def truncate_text(text: str, max_length: int = 100) -> str:
+    """
+    Truncate text to specified length
+    Args:
+        text (str): Text to truncate
+        max_length (int): Maximum length
+    Returns:
+        str: Truncated text
+    """
+    if len(text) <= max_length:
+        return text
+    return text[:max_length - 3] + "..."