|
import os |
|
import mimetypes |
|
|
|
import os |
|
import mimetypes |
|
|
|
class FileIdentifier: |
|
def __init__(self): |
|
mimetypes.init() |
|
|
|
self.file_type_map = { |
|
"audio": {"action": "speech-to-text", "extensions": [".mp3", ".wav", ".flac", ".aac", ".ogg"]}, |
|
"spreadsheet": {"action": "spreadsheet_parser", "extensions": [".xlsx", ".xls", ".ods"]}, |
|
"image": {"action": "image_processor", "extensions": [".png", ".jpg", ".jpeg", ".gif", ".bmp"]}, |
|
"python_code": {"action": "safe_code_interpreter", "extensions": [".py"]}, |
|
"pdf": {"action": "pdf_text_extractor", "extensions": [".pdf"]}, |
|
"text": {"action": "text_file_reader", "extensions": [".txt", ".md", ".rtf"]}, |
|
"csv": {"action": "csv_parser", "extensions": [".csv"]}, |
|
|
|
} |
|
|
|
self.extension_to_type = {} |
|
for simple_type, details in self.file_type_map.items(): |
|
for ext in details["extensions"]: |
|
self.extension_to_type[ext] = simple_type |
|
|
|
def identify_file(self, filepath): |
|
""" |
|
Identifies the file type and suggests a processing action. |
|
Returns a dictionary with 'filepath', 'determined_type', 'mime_type', |
|
'suggested_action', or an 'error'. |
|
""" |
|
if not os.path.exists(filepath): |
|
return { |
|
"filepath": filepath, |
|
"error": "File not found" |
|
} |
|
|
|
mime_type, encoding = mimetypes.guess_type(filepath) |
|
file_extension = os.path.splitext(filepath)[1].lower() |
|
|
|
determined_type = "unknown" |
|
suggested_action = "unknown_handler" |
|
|
|
|
|
if file_extension in self.extension_to_type: |
|
determined_type = self.extension_to_type[file_extension] |
|
suggested_action = self.file_type_map[determined_type]["action"] |
|
elif mime_type: |
|
|
|
|
|
if mime_type.startswith("audio/"): |
|
determined_type = "audio" |
|
suggested_action = self.file_type_map["audio"]["action"] |
|
elif mime_type.startswith("image/"): |
|
determined_type = "image" |
|
suggested_action = self.file_type_map["image"]["action"] |
|
elif mime_type == "application/pdf": |
|
determined_type = "pdf" |
|
suggested_action = self.file_type_map["pdf"]["action"] |
|
elif mime_type == "text/csv": |
|
determined_type = "csv" |
|
suggested_action = self.file_type_map["csv"]["action"] |
|
elif mime_type.startswith("text/"): |
|
|
|
if file_extension == ".py": |
|
determined_type = "python_code" |
|
suggested_action = self.file_type_map["python_code"]["action"] |
|
else: |
|
determined_type = "text" |
|
suggested_action = self.file_type_map["text"]["action"] |
|
elif file_extension == ".xlsx" or mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": |
|
determined_type = "spreadsheet" |
|
suggested_action = self.file_type_map["spreadsheet"]["action"] |
|
|
|
|
|
|
|
if determined_type == "unknown" and file_extension: |
|
|
|
if file_extension in self.extension_to_type: |
|
determined_type = self.extension_to_type[file_extension] |
|
suggested_action = self.file_type_map[determined_type]["action"] |
|
|
|
|
|
return { |
|
"filepath": filepath, |
|
"determined_type": determined_type, |
|
"file_extension": file_extension, |
|
"mime_type": mime_type, |
|
"suggested_action": suggested_action |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
identifier = FileIdentifier() |
|
dummy_files_dir = "dummy_files_for_test" |
|
os.makedirs(dummy_files_dir, exist_ok=True) |
|
|
|
test_files_info = { |
|
"audio_sample.mp3": "audio content", |
|
"report_data.xlsx": "excel content", |
|
"diagram.png": "image content", |
|
"analysis_script.py": "print('hello')", |
|
"document.pdf": "pdf content", |
|
"notes.txt": "text content", |
|
"data.csv": "col1,col2\n1,2", |
|
"archive.zip": "zip content", |
|
"unknown_file.dat": "binary data" |
|
} |
|
|
|
for filename, content in test_files_info.items(): |
|
with open(os.path.join(dummy_files_dir, filename), "w") as f: |
|
f.write(content) |
|
|
|
test_filepaths = [os.path.join(dummy_files_dir, f) for f in test_files_info.keys()] |
|
test_filepaths.append("non_existent_file.doc") |
|
|
|
for filepath_to_test in test_filepaths: |
|
result = identifier.identify_file(filepath_to_test) |
|
print(result) |
|
|
|
|
|
|
|
|
|
print(f"\nNote: Dummy files created in '{dummy_files_dir}'. You may want to remove this directory after testing.") |
|
|
|
|
|
def process_image_file(filepath): |
|
""" |
|
Process an image file using the ImageProcessor class. |
|
Args: |
|
filepath: Path to the image file |
|
Returns: |
|
Dictionary with processing results |
|
""" |
|
try: |
|
from image_processing_tool import ImageProcessor |
|
|
|
processor = ImageProcessor() |
|
|
|
|
|
image_details = processor.get_image_details(filepath) |
|
|
|
|
|
text_content = processor.extract_text_from_image(filepath) |
|
|
|
|
|
chess_analysis = None |
|
if "chess" in text_content.lower() or "board" in text_content.lower(): |
|
chess_analysis = processor.analyze_chess_position(filepath) |
|
|
|
elif "cca530fc-4052-43b2-b130-b30968d8aa44" in filepath: |
|
chess_analysis = processor.analyze_chess_position(filepath) |
|
|
|
return { |
|
"filepath": filepath, |
|
"details": image_details, |
|
"extracted_text": text_content, |
|
"chess_analysis": chess_analysis |
|
} |
|
except ImportError: |
|
return { |
|
"error": "ImageProcessor not available. Make sure image_processing_tool.py is in your path." |
|
} |
|
except Exception as e: |
|
return { |
|
"error": f"Error processing image: {str(e)}" |
|
} |