Duibonduil's picture
Upload 9 files
3e8c06e verified
import uuid
from typing import Any, Optional, Dict, List
from pydantic import Field
from aworld.output.artifact import Artifact, ArtifactType, ArtifactAttachment
CODE_FILE_EXTENSION_MAP = {
"python": "py",
"java": "java",
"javascript": "js",
"typescript": "ts",
"html": "html",
"css": "css",
"c": "c",
"cpp": "cpp",
"csharp": "cs",
"go": "go",
"rust": "rs",
"ruby": "rb",
"php": "php",
"swift": "swift",
"kotlin": "kt",
"scala": "scala",
"markdown": "md",
"txt": "txt",
"shell": "sh",
"bash": "sh",
"sh": "sh",
"zsh": "zsh",
"powershell": "ps1",
"cmd": "cmd",
"bat": "bat"
}
class CodeArtifact(Artifact):
code_interceptor: Any = Field(default=None, description="code executor type")
def __init__(self, artifact_type: ArtifactType, content: Any, code_type: Optional[str], code_version: Optional[str],
code_interceptor_provider: Optional[str] = None,
artifact_id: Optional[str] = None, render_type: Optional[str] = None, **kwargs):
# Extract filename from the first line of the content
filename = self.extract_filename(content)
# Initialize metadata, including any passed in kwargs
metadata = {
"code_type": code_type,
"code_version": code_version,
"code_interceptor_provider": code_interceptor_provider,
"filename": filename # Store filename in metadata
}
# Merge additional metadata from kwargs if provided
if 'metadata' in kwargs:
metadata.update(kwargs['metadata'])
del kwargs['metadata'] # Remove metadata from kwargs to avoid multiple values
super().__init__(
artifact_type=artifact_type,
content=content,
metadata=metadata,
artifact_id=artifact_id,
render_type=render_type,
**kwargs
)
self.archive()
self.code_interceptor = self.init_code_interceptor(code_interceptor_provider)
@staticmethod
def extract_filename(content: Any) -> Optional[str]:
"""Extract filename from the first line of the code block comment."""
if isinstance(content, str):
lines = content.splitlines()
if lines:
first_line = lines[0].strip()
# Check if the first line is a shebang for bash or other interpreters
if first_line in ["# /bin/bash", "#!/bin/bash", "#!/usr/bin/env bash",
"#!/bin/sh", "#!/usr/bin/env python",
"#!/usr/bin/env python3"]:
return None # Do not return a filename
# Check for common comment styles in various languages
if first_line.startswith("#"): # Python, Ruby, Shell
return first_line[1:].strip() # Remove the comment symbol
elif first_line.startswith("//"): # Java, JavaScript, C, C++
return first_line[2:].strip() # Remove the comment symbol
elif first_line.startswith("/*") and "*/" in first_line: # C, C++
return first_line.split("*/")[0][2:].strip() # Remove comment symbols
elif first_line.startswith("<!--"): # HTML
return first_line[4:].strip() # Remove the comment symbol
# Add more languages as needed
return None # Return None if filename is unknown
@classmethod
def build_artifact(cls,
content: Any,
code_type: Optional[str] = None,
code_version: Optional[str] = None,
code_interceptor_provider: Optional[str] = None,
artifact_id: Optional[str] = None,
render_type: Optional[str] = None,
**kwargs) -> "CodeArtifact":
# Create CodeArtifact instance
if code_type in ['shell', 'sh', 'bash', 'zsh']:
return ShellArtifact(
artifact_type=ArtifactType.CODE,
content=content,
code_version=code_version,
code_interceptor_provider=code_interceptor_provider,
artifact_id=artifact_id,
render_type=render_type,
**kwargs
)
elif code_type in ['html']:
return HtmlArtifact(
content=content,
artifact_id=artifact_id,
**kwargs
)
return cls(
artifact_type=ArtifactType.CODE,
content=content,
code_type=code_type,
code_version=code_version,
code_interceptor_provider=code_interceptor_provider,
artifact_id=artifact_id,
render_type=render_type,
**kwargs
)
@classmethod
def from_code_content(cls, artifact_type: ArtifactType,
content: Any,
render_type: Optional[str] = None,
**kwargs) -> List["CodeArtifact"]:
code_blocks = cls.extract_model_output_to_code_content(content) # Extract code blocks
artifacts = [] # List to store CodeArtifact instances
for block in code_blocks:
code_type = block['language']
code_version = "1.0"
if code_type in ['python', 'javascript', 'java']:
code_interceptor_provider = "default_interceptor"
elif code_type in ['shell', 'sh', 'bash', 'zsh']:
code_interceptor_provider = "shell_interceptor"
else:
code_interceptor_provider = "generic_interceptor"
artifact = cls.create_artifact(
artifact_type=ArtifactType.CODE,
content=block['content'],
code_type=code_type,
code_version=code_version,
code_interceptor_provider=code_interceptor_provider,
artifact_id=block['artifact_id'], # Use extracted artifact_id
render_type=render_type,
**kwargs
)
artifacts.append(artifact) # Add to the list
return artifacts # Return the list of CodeArtifact instances
def init_code_interceptor(self, code_interceptor_provider):
pass
@classmethod
def extract_model_output_to_code_content(cls, content):
"""
Extract code blocks from markdown content using mistune.
First extracts all code blocks enclosed in triple backticks,
then determines the language for each block.
"""
try:
import mistune
except ImportError:
# install mistune
import subprocess
subprocess.run(["pip", "install", "mistune>=3.0.0"], check=True)
import mistune
code_blocks = []
#
extracted_blocks = []
# create custom Render
class CustomRenderer(mistune.HTMLRenderer):
def block_code(self, code, info=None):
language = info.split()[0] if info else 'unknown'
extracted_blocks.append({
"content": code,
"language": language
})
return ""
# create Markdown render
renderer = CustomRenderer()
markdown = mistune.create_markdown(
renderer=renderer
)
# resolve markdown
markdown(content)
# process codeblocks
for block in extracted_blocks:
artifact_id = str(uuid.uuid4())
language = block['language']
file_suffix = CODE_FILE_EXTENSION_MAP.get(language, "txt")
code_blocks.append({
"artifact_id": artifact_id,
"content": block['content'],
"language": language,
"file_suffix": file_suffix
})
return code_blocks
class ShellArtifact(CodeArtifact):
shell_result: str = Field(default="", description="shell execution result")
def __init__(self, artifact_type: ArtifactType, content: Any, code_version: str,
code_interceptor_provider: Optional[str] = None,
artifact_id: Optional[str] = None, render_type: Optional[str] = None,
shell_result: str = "", **kwargs):
code_type = "shell"
# extract filename
filename = self.extract_filename(content)
# default set terminal.txt
if not filename:
filename = "terminal.txt"
# update metadata
metadata = kwargs.get('metadata', {})
metadata['filename'] = filename
# setting code_interceptor_provider
if code_interceptor_provider is None:
code_interceptor_provider = "shell_interceptor"
super().__init__(artifact_type, content, code_type, code_version,
code_interceptor_provider, artifact_id, render_type, metadata=metadata, **kwargs)
self.shell_result = shell_result
def execute(self):
# todo add
pass
class HtmlArtifact(CodeArtifact):
def __init__(self, content: Any, artifact_id: Optional[str] = None, **kwargs):
# Remove artifact_type from kwargs if it exists to avoid conflicts
kwargs.pop('artifact_type', None)
super().__init__(
artifact_type=ArtifactType.HTML,
content=content,
code_type='html',
code_version='1.0',
artifact_id=artifact_id,
**kwargs
)
content = content.replace("```html", "").replace("```", "")
self.content = None
self.attachments.append(
ArtifactAttachment(filename=f"{artifact_id}.html", content=content, mime_type="text/html")
)