Spaces:

gradio
/

chat.gradio.app-HFIPs

Running

App Files Files Community

chat.gradio.app-HFIPs / utils.py

ysharma HF Staff

Create utils.py

6af4299 verified 3 months ago

raw

history blame

6.12 kB

	"""
	Utility functions for Universal MCP Client
	"""
	import re
	import logging
	from typing import List, Dict, Any, Optional
	from pathlib import Path

	logger = logging.getLogger(__name__)

	def validate_huggingface_space_name(space_name: str) -> bool:
	"""
	Validate HuggingFace space name format
	Expected format: username/space-name
	"""
	if not space_name or not isinstance(space_name, str):
	return False

	# Check for the required "/" separator
	if "/" not in space_name:
	return False

	parts = space_name.split("/")
	if len(parts) != 2:
	return False

	username, space_name_part = parts

	# Basic validation for username and space name
	# HuggingFace usernames and space names should be alphanumeric with hyphens and underscores
	username_pattern = r'^[a-zA-Z0-9\-_]+$'
	space_pattern = r'^[a-zA-Z0-9\-_]+$'

	return bool(re.match(username_pattern, username) and re.match(space_pattern, space_name_part))

	def sanitize_server_name(name: str) -> str:
	"""
	Sanitize server name for use as MCP server identifier
	"""
	if not name:
	return "unnamed_server"

	# Convert to lowercase and replace spaces and special chars with underscores
	sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name.lower())

	# Remove multiple consecutive underscores
	sanitized = re.sub(r'_+', '_', sanitized)

	# Remove leading/trailing underscores
	sanitized = sanitized.strip('_')

	return sanitized or "unnamed_server"

	def format_file_size(size_bytes: int) -> str:
	"""
	Format file size in human readable format
	"""
	if size_bytes == 0:
	return "0 B"

	size_names = ["B", "KB", "MB", "GB", "TB"]
	i = 0
	while size_bytes >= 1024 and i < len(size_names) - 1:
	size_bytes /= 1024.0
	i += 1

	return f"{size_bytes:.1f} {size_names[i]}"

	def get_file_info(file_path: str) -> Dict[str, Any]:
	"""
	Get information about a file
	"""
	try:
	path = Path(file_path)
	if not path.exists():
	return {"error": "File not found"}

	stat = path.stat()

	return {
	"name": path.name,
	"size": stat.st_size,
	"size_formatted": format_file_size(stat.st_size),
	"extension": path.suffix.lower(),
	"exists": True
	}
	except Exception as e:
	logger.error(f"Error getting file info for {file_path}: {e}")
	return {"error": str(e)}

	def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
	"""
	Truncate text to a maximum length with suffix
	"""
	if not text or len(text) <= max_length:
	return text

	return text[:max_length - len(suffix)] + suffix

	def format_tool_description(tool_name: str, description: str, max_desc_length: int = 150) -> str:
	"""
	Format tool description for display
	"""
	formatted_name = tool_name.replace("_", " ").title()
	truncated_desc = truncate_text(description, max_desc_length)

	return f"{formatted_name}: {truncated_desc}"

	def extract_media_type_from_url(url: str) -> Optional[str]:
	"""
	Extract media type from URL based on file extension
	"""
	if not url:
	return None

	# Handle data URLs
	if url.startswith('data:'):
	if 'image/' in url:
	return 'image'
	elif 'audio/' in url:
	return 'audio'
	elif 'video/' in url:
	return 'video'
	return None

	# Handle regular URLs - extract extension
	url_lower = url.lower()

	if any(ext in url_lower for ext in ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']):
	return 'image'
	elif any(ext in url_lower for ext in ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac']):
	return 'audio'
	elif any(ext in url_lower for ext in ['.mp4', '.avi', '.mov', '.mkv', '.webm']):
	return 'video'

	return None

	def clean_html_for_display(html_text: str) -> str:
	"""
	Clean HTML text for safe display in Gradio
	"""
	if not html_text:
	return ""

	# Remove script tags for security
	html_text = re.sub(r'<script[^>]>.?</script>', '', html_text, flags=re.IGNORECASE \| re.DOTALL)

	# Remove potentially dangerous attributes
	html_text = re.sub(r'on\w+\s=\s["\'][^"\']*["\']', '', html_text, flags=re.IGNORECASE)

	return html_text

	def generate_accordion_html(title: str, content: str, is_open: bool = False) -> str:
	"""
	Generate HTML for a collapsible accordion section
	"""
	open_attr = "open" if is_open else ""

	return f"""
	<details {open_attr} style="margin-bottom: 10px;">
	<summary style="cursor: pointer; padding: 8px; background: #e9ecef; border-radius: 4px;">
	<strong>{title}</strong>
	</summary>
	<div style="padding: 10px; border-left: 3px solid #007bff; margin-left: 10px; margin-top: 5px;">
	{content}
	</div>
	</details>
	"""

	class EventTracker:
	"""Simple event tracking for debugging and monitoring"""

	def __init__(self):
	self.events: List[Dict[str, Any]] = []
	self.max_events = 100

	def track_event(self, event_type: str, data: Dict[str, Any] = None):
	"""Track an event"""
	import datetime

	event = {
	"timestamp": datetime.datetime.now().isoformat(),
	"type": event_type,
	"data": data or {}
	}

	self.events.append(event)

	# Keep only the most recent events
	if len(self.events) > self.max_events:
	self.events = self.events[-self.max_events:]

	logger.debug(f"Event tracked: {event_type}")

	def get_recent_events(self, count: int = 10) -> List[Dict[str, Any]]:
	"""Get recent events"""
	return self.events[-count:]

	def clear_events(self):
	"""Clear all tracked events"""
	self.events.clear()

	# Global event tracker instance
	event_tracker = EventTracker()