assignment_agent / utils.py
arbnori45's picture
Upload 54 files
922f271 verified
"""
Utility functions for working with different file formats in the resources directory
"""
import os
import json
import pandas as pd
from typing import Dict, Any, Union, List, Optional
import logging
from PIL import Image
import base64
from io import BytesIO
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Constants
RESOURCE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource")
def list_resources() -> List[str]:
"""List all files in the resources directory"""
try:
return [f for f in os.listdir(RESOURCE_DIR) if os.path.isfile(os.path.join(RESOURCE_DIR, f))]
except Exception as e:
logger.error(f"Error listing resources: {e}")
return []
def load_excel(file_path: str) -> Union[pd.DataFrame, None]:
"""Load data from an Excel file"""
try:
return pd.read_excel(file_path)
except Exception as e:
logger.error(f"Error reading Excel file {file_path}: {e}")
return None
def load_csv(file_path: str) -> Union[pd.DataFrame, None]:
"""Load data from a CSV file"""
try:
return pd.read_csv(file_path)
except Exception as e:
logger.error(f"Error reading CSV file {file_path}: {e}")
return None
def load_text(file_path: str) -> Union[str, None]:
"""Load content from a text file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
logger.error(f"Error reading text file {file_path}: {e}")
return None
def load_json(file_path: str) -> Union[Dict, List, None]:
"""Load data from a JSON file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.error(f"Error reading JSON file {file_path}: {e}")
return None
def load_image(file_path: str) -> Union[str, None]:
"""Load an image file and return base64 representation"""
try:
with Image.open(file_path) as img:
buffered = BytesIO()
img.save(buffered, format=img.format)
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/{img.format.lower()};base64,{img_str}"
except Exception as e:
logger.error(f"Error reading image file {file_path}: {e}")
return None
def get_file_handler(file_path: str) -> Union[Any, None]:
"""Get the appropriate handler for a file based on its extension"""
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
return None
ext = os.path.splitext(file_path)[1].lower()
if ext in ['.xlsx', '.xls']:
return load_excel(file_path)
elif ext == '.csv':
return load_csv(file_path)
elif ext in ['.txt', '.md', '.py']:
return load_text(file_path)
elif ext in ['.json', '.jsonld']:
return load_json(file_path)
elif ext in ['.jpg', '.jpeg', '.png', '.gif']:
return load_image(file_path)
else:
logger.warning(f"No handler for file type {ext}")
return None
def search_metadata_by_question(question: str) -> List[Dict]:
"""
Search the metadata.jsonl file for entries that match a given question
"""
results = []
metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")
try:
with open(metadata_path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
metadata_question = data.get('Question', '').lower()
# Check for question match
if question.lower() in metadata_question or metadata_question in question.lower():
results.append(data)
# Check if this is a file-based question
if 'attached' in question.lower() or 'spreadsheet' in question.lower():
if data.get('file_name'):
results.append(data)
except Exception as e:
logger.error(f"Error searching metadata: {e}")
return results
def get_metadata_answer(task_id: str) -> str:
"""Get the answer for a specific task ID from metadata"""
metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")
try:
with open(metadata_path, 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
if data.get('task_id') == task_id:
return data.get('Final answer', '')
except Exception as e:
logger.error(f"Error getting metadata answer: {e}")
return ""