Spaces:

arbnori45
/

assignment_agent

Sleeping

App Files Files Community

assignment_agent / utils.py

arbnori45

Upload 54 files

922f271 verified about 2 months ago

raw

history blame contribute delete

4.76 kB

	"""
	Utility functions for working with different file formats in the resources directory
	"""
	import os
	import json
	import pandas as pd
	from typing import Dict, Any, Union, List, Optional
	import logging
	from PIL import Image
	import base64
	from io import BytesIO

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# Constants
	RESOURCE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource")

	def list_resources() -> List[str]:
	"""List all files in the resources directory"""
	try:
	return [f for f in os.listdir(RESOURCE_DIR) if os.path.isfile(os.path.join(RESOURCE_DIR, f))]
	except Exception as e:
	logger.error(f"Error listing resources: {e}")
	return []

	def load_excel(file_path: str) -> Union[pd.DataFrame, None]:
	"""Load data from an Excel file"""
	try:
	return pd.read_excel(file_path)
	except Exception as e:
	logger.error(f"Error reading Excel file {file_path}: {e}")
	return None

	def load_csv(file_path: str) -> Union[pd.DataFrame, None]:
	"""Load data from a CSV file"""
	try:
	return pd.read_csv(file_path)
	except Exception as e:
	logger.error(f"Error reading CSV file {file_path}: {e}")
	return None

	def load_text(file_path: str) -> Union[str, None]:
	"""Load content from a text file"""
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	return f.read()
	except Exception as e:
	logger.error(f"Error reading text file {file_path}: {e}")
	return None

	def load_json(file_path: str) -> Union[Dict, List, None]:
	"""Load data from a JSON file"""
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception as e:
	logger.error(f"Error reading JSON file {file_path}: {e}")
	return None

	def load_image(file_path: str) -> Union[str, None]:
	"""Load an image file and return base64 representation"""
	try:
	with Image.open(file_path) as img:
	buffered = BytesIO()
	img.save(buffered, format=img.format)
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/{img.format.lower()};base64,{img_str}"
	except Exception as e:
	logger.error(f"Error reading image file {file_path}: {e}")
	return None

	def get_file_handler(file_path: str) -> Union[Any, None]:
	"""Get the appropriate handler for a file based on its extension"""
	if not os.path.exists(file_path):
	logger.error(f"File not found: {file_path}")
	return None

	ext = os.path.splitext(file_path)[1].lower()

	if ext in ['.xlsx', '.xls']:
	return load_excel(file_path)
	elif ext == '.csv':
	return load_csv(file_path)
	elif ext in ['.txt', '.md', '.py']:
	return load_text(file_path)
	elif ext in ['.json', '.jsonld']:
	return load_json(file_path)
	elif ext in ['.jpg', '.jpeg', '.png', '.gif']:
	return load_image(file_path)
	else:
	logger.warning(f"No handler for file type {ext}")
	return None

	def search_metadata_by_question(question: str) -> List[Dict]:
	"""
	Search the metadata.jsonl file for entries that match a given question
	"""
	results = []
	metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")

	try:
	with open(metadata_path, 'r', encoding='utf-8') as f:
	for line in f:
	data = json.loads(line)
	metadata_question = data.get('Question', '').lower()

	# Check for question match
	if question.lower() in metadata_question or metadata_question in question.lower():
	results.append(data)

	# Check if this is a file-based question
	if 'attached' in question.lower() or 'spreadsheet' in question.lower():
	if data.get('file_name'):
	results.append(data)

	except Exception as e:
	logger.error(f"Error searching metadata: {e}")

	return results

	def get_metadata_answer(task_id: str) -> str:
	"""Get the answer for a specific task ID from metadata"""
	metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")

	try:
	with open(metadata_path, 'r', encoding='utf-8') as f:
	for line in f:
	data = json.loads(line)
	if data.get('task_id') == task_id:
	return data.get('Final answer', '')
	except Exception as e:
	logger.error(f"Error getting metadata answer: {e}")

	return ""