File size: 4,759 Bytes
922f271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Utility functions for working with different file formats in the resources directory
"""
import os
import json
import pandas as pd
from typing import Dict, Any, Union, List, Optional
import logging
from PIL import Image
import base64
from io import BytesIO

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
RESOURCE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource")

def list_resources() -> List[str]:
    """List all files in the resources directory"""
    try:
        return [f for f in os.listdir(RESOURCE_DIR) if os.path.isfile(os.path.join(RESOURCE_DIR, f))]
    except Exception as e:
        logger.error(f"Error listing resources: {e}")
        return []

def load_excel(file_path: str) -> Union[pd.DataFrame, None]:
    """Load data from an Excel file"""
    try:
        return pd.read_excel(file_path)
    except Exception as e:
        logger.error(f"Error reading Excel file {file_path}: {e}")
        return None

def load_csv(file_path: str) -> Union[pd.DataFrame, None]:
    """Load data from a CSV file"""
    try:
        return pd.read_csv(file_path)
    except Exception as e:
        logger.error(f"Error reading CSV file {file_path}: {e}")
        return None

def load_text(file_path: str) -> Union[str, None]:
    """Load content from a text file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        logger.error(f"Error reading text file {file_path}: {e}")
        return None

def load_json(file_path: str) -> Union[Dict, List, None]:
    """Load data from a JSON file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Error reading JSON file {file_path}: {e}")
        return None

def load_image(file_path: str) -> Union[str, None]:
    """Load an image file and return base64 representation"""
    try:
        with Image.open(file_path) as img:
            buffered = BytesIO()
            img.save(buffered, format=img.format)
            img_str = base64.b64encode(buffered.getvalue()).decode()
            return f"data:image/{img.format.lower()};base64,{img_str}"
    except Exception as e:
        logger.error(f"Error reading image file {file_path}: {e}")
        return None

def get_file_handler(file_path: str) -> Union[Any, None]:
    """Get the appropriate handler for a file based on its extension"""
    if not os.path.exists(file_path):
        logger.error(f"File not found: {file_path}")
        return None
    
    ext = os.path.splitext(file_path)[1].lower()
    
    if ext in ['.xlsx', '.xls']:
        return load_excel(file_path)
    elif ext == '.csv':
        return load_csv(file_path)
    elif ext in ['.txt', '.md', '.py']:
        return load_text(file_path)
    elif ext in ['.json', '.jsonld']:
        return load_json(file_path)
    elif ext in ['.jpg', '.jpeg', '.png', '.gif']:
        return load_image(file_path)
    else:
        logger.warning(f"No handler for file type {ext}")
        return None

def search_metadata_by_question(question: str) -> List[Dict]:
    """
    Search the metadata.jsonl file for entries that match a given question
    """
    results = []
    metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")
    
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            for line in f:
                data = json.loads(line)
                metadata_question = data.get('Question', '').lower()
                
                # Check for question match
                if question.lower() in metadata_question or metadata_question in question.lower():
                    results.append(data)
                    
                # Check if this is a file-based question
                if 'attached' in question.lower() or 'spreadsheet' in question.lower():
                    if data.get('file_name'):
                        results.append(data)
                        
    except Exception as e:
        logger.error(f"Error searching metadata: {e}")
    
    return results

def get_metadata_answer(task_id: str) -> str:
    """Get the answer for a specific task ID from metadata"""
    metadata_path = os.path.join(RESOURCE_DIR, "metadata.jsonl")
    
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            for line in f:
                data = json.loads(line)
                if data.get('task_id') == task_id:
                    return data.get('Final answer', '')
    except Exception as e:
        logger.error(f"Error getting metadata answer: {e}")
    
    return ""