cc-api / json_parser.py
Severian's picture
Update json_parser.py
7e0a2e6 verified
raw
history blame
7.71 kB
from logger_config import setup_logger
from typing import Dict, Any, Optional, List, Union, Tuple
from dataclasses import dataclass
from enum import Enum
import json
import re
logger = setup_logger()
class MessageState:
def __init__(self):
self.buffer = ""
self.is_complete = False
self.tool_outputs = []
self.citations = []
self.metadata = {}
self.processed_events = set()
self.current_message_id = None
class SSEParser:
def __init__(self):
self.logger = setup_logger("sse_parser")
self.current_message = MessageState()
def _extract_json_content(self, data: str) -> Optional[str]:
"""Extract JSON content from SSE data line"""
if "data:" in data:
return data.split("data:", 1)[1].strip()
return None
def _is_valid_json(self, content: str) -> bool:
"""Check if content is valid JSON"""
try:
json.loads(content)
return True
except json.JSONDecodeError:
return False
def _clean_mermaid_content(self, content: str) -> Optional[str]:
"""Clean and extract mermaid diagram content"""
try:
# Remove tool response prefix/suffix if present
if "tool response:" in content:
content = content.split("tool response:", 1)[1].strip()
# Parse JSON if present
try:
data = json.loads(content)
# Handle different mermaid output formats
if "mermaid_output" in data:
content = data["mermaid_output"]
elif "mermaid_diagram" in data:
content = data["mermaid_diagram"]
except json.JSONDecodeError:
pass
# Clean up markdown formatting
content = content.replace("```mermaid\n", "").replace("\n```", "")
return content.strip()
except Exception as e:
self.logger.error(f"Error cleaning mermaid content: {str(e)}")
return None
def parse_sse_event(self, data: str) -> Optional[Dict]:
"""Parse SSE event data and format for frontend consumption"""
try:
# Extract JSON content from SSE data
json_content = self._extract_json_content(data)
if not json_content:
return None
# Parse JSON content
parsed_data = json.loads(json_content)
# Get event details
event_type = parsed_data.get("event")
message_id = parsed_data.get("message_id")
# Format based on event type
if event_type == "agent_message":
return {
"type": "message",
"content": parsed_data.get("answer", ""),
"message_id": message_id
}
elif event_type == "agent_thought":
thought = parsed_data.get("thought", "")
observation = parsed_data.get("observation", "")
tool = parsed_data.get("tool", "")
# Handle tool-specific formatting
if tool == "mermaid_diagrams":
try:
cleaned_content = self._clean_mermaid_content(observation)
if cleaned_content:
return {
"type": "tool_output",
"tool": "mermaid",
"content": cleaned_content,
"message_id": message_id
}
except Exception as e:
self.logger.error(f"Failed to parse mermaid data: {str(e)}")
return {
"type": "thought",
"content": {
"thought": thought,
"observation": observation,
"tool": tool
},
"message_id": message_id
}
elif event_type == "message_end":
return {
"type": "end",
"message_id": message_id,
"metadata": parsed_data.get("metadata", {})
}
return None
except Exception as e:
self.logger.error(f"Parse error: {str(e)}")
return None
def _process_observation(self, data: Dict) -> Dict:
"""Process observation content with special handling for tool outputs"""
try:
observation = data.get("observation")
if observation and isinstance(observation, str):
# Handle tool-specific content
if "mermaid_diagram" in observation:
cleaned_content = self.clean_mermaid_content(observation)
if cleaned_content not in [t.get("content") for t in self.current_message.tool_outputs]:
self.current_message.tool_outputs.append({
"type": "mermaid_diagram",
"content": cleaned_content
})
data["observation"] = json.dumps({
"mermaid_diagram": cleaned_content
})
elif self._is_valid_json(observation):
# Handle other tool outputs
try:
tool_data = json.loads(observation)
if isinstance(tool_data, dict):
for tool_name, tool_output in tool_data.items():
if tool_output not in [t.get("content") for t in self.current_message.tool_outputs]:
self.current_message.tool_outputs.append({
"type": tool_name,
"content": tool_output
})
except json.JSONDecodeError:
pass
except Exception as e:
self.logger.error(f"Error processing observation: {str(e)}")
return data
def _handle_message_end(self, data: Dict) -> None:
"""Handle message end event and cleanup state"""
self.current_message.citations = data.get("retriever_resources", [])
self.current_message.metadata = data.get("metadata", {})
self.current_message.metadata["tool_outputs"] = self.current_message.tool_outputs
self.current_message.is_complete = True
def clean_mermaid_content(self, content: str) -> str:
"""Clean and format mermaid diagram content"""
try:
# Remove markdown and JSON formatting
content = re.sub(r'```mermaid\s*|\s*```', '', content)
content = re.sub(r'tool response:.*?{', '{', content)
content = re.sub(r'}\s*\.$', '}', content)
# Parse JSON if present
if content.strip().startswith('{'):
try:
content_dict = json.loads(content)
if isinstance(content_dict, dict):
content = content_dict.get("mermaid_diagram", content)
except:
pass
return content.strip()
except Exception as e:
self.logger.error(f"Error cleaning mermaid content: {e}")
return content