methunraj
feat: initialize project structure with core components
cfeb3a6
from pathlib import Path
from PIL import Image
import PyPDF2
from config.settings import settings
from typing import Dict
import tempfile
import os
class FileHandler:
def __init__(self):
self.temp_dir = Path(settings.TEMP_DIR)
self.max_size_mb = settings.MAX_FILE_SIZE_MB
def validate_file(self, uploaded_file) -> Dict:
validation = {"valid": False, "error": None, "file_info": None}
if not uploaded_file:
validation["error"] = "No file"
return validation
file_size_mb = len(uploaded_file.getbuffer()) / (1024 * 1024)
if file_size_mb > self.max_size_mb:
validation["error"] = "File too large"
return validation
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension not in settings.SUPPORTED_FILE_TYPES:
validation["error"] = "Unsupported type"
return validation
validation["valid"] = True
# Extract just filename for display (uploaded_file.name contains full Gradio temp path)
import os
filename = os.path.basename(uploaded_file.name)
validation["file_info"] = {"name": filename, "size_mb": file_size_mb, "type": file_extension}
return validation
def save_uploaded_file(self, uploaded_file, session_id: str) -> str:
# Handle None session_id gracefully
if not session_id:
import uuid
session_id = str(uuid.uuid4())[:8]
# Create session directory in temp
session_dir = self.temp_dir / session_id / "input"
session_dir.mkdir(parents=True, exist_ok=True)
# Extract just the filename from the full path (uploaded_file.name contains full Gradio temp path)
import os
import logging
logger = logging.getLogger(__name__)
filename = os.path.basename(uploaded_file.name)
file_path = session_dir / filename
logger.info(f"Moving file from Gradio temp: {uploaded_file.name}")
logger.info(f"To session directory: {file_path}")
with open(file_path, "wb") as f:
# Handle different types of file upload objects
if hasattr(uploaded_file, 'getbuffer'):
f.write(uploaded_file.getbuffer())
elif hasattr(uploaded_file, 'read'):
f.write(uploaded_file.read())
else:
# For NamedString or similar objects, read from the file path
with open(uploaded_file.name, 'rb') as src: # Use uploaded_file.name (Gradio temp path) to read
f.write(src.read())
return str(file_path)
def get_file_preview(self, file_path: str, file_type: str) -> str:
if file_type == 'pdf':
try:
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
if len(reader.pages) > 0:
text = reader.pages[0].extract_text()
return text[:500] + "..." if len(text) > 500 else text
except Exception:
return "PDF preview not available"
elif file_type == 'txt':
try:
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
return text[:500] + "..." if len(text) > 500 else text
except Exception:
return "Text preview not available"
# Similar for image types could be added
return "Preview not available"
def cleanup_temp_files(self):
"""Clean up old temporary files."""
try:
import time
current_time = time.time()
# Clean up sessions older than 24 hours
for session_dir in self.temp_dir.iterdir():
if session_dir.is_dir():
# Check if directory is older than 24 hours
dir_age = current_time - session_dir.stat().st_mtime
if dir_age > 24 * 3600: # 24 hours in seconds
import shutil
shutil.rmtree(session_dir)
except Exception:
pass # Ignore cleanup errors