lucy1118's picture
Upload 78 files
8d7f55c verified
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from dataclasses import dataclass
import io
import json
from typing import List
from PIL import Image
from pipecat.frames.frames import Frame, VisionImageRawFrame
from openai._types import NOT_GIVEN, NotGiven
from openai.types.chat import (
ChatCompletionToolParam,
ChatCompletionToolChoiceOptionParam,
ChatCompletionMessageParam
)
# JSON custom encoder to handle bytes arrays so that we can log contexts
# with images to the console.
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, io.BytesIO):
# Convert the first 8 bytes to an ASCII hex string
return (f"{obj.getbuffer()[0:8].hex()}...")
return super().default(obj)
class OpenAILLMContext:
def __init__(
self,
messages: List[ChatCompletionMessageParam] | None = None,
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN
):
self.messages: List[ChatCompletionMessageParam] = messages if messages else [
]
self.tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice
self.tools: List[ChatCompletionToolParam] | NotGiven = tools
@staticmethod
def from_messages(messages: List[dict]) -> "OpenAILLMContext":
context = OpenAILLMContext()
for message in messages:
context.add_message({
"content": message["content"],
"role": message["role"],
"name": message["name"] if "name" in message else message["role"]
})
return context
@staticmethod
def from_image_frame(frame: VisionImageRawFrame) -> "OpenAILLMContext":
"""
For images, we are deviating from the OpenAI messages shape. OpenAI
expects images to be base64 encoded, but other vision models may not.
So we'll store the image as bytes and do the base64 encoding as needed
in the LLM service.
"""
context = OpenAILLMContext()
buffer = io.BytesIO()
Image.frombytes(
frame.format,
frame.size,
frame.image
).save(
buffer,
format="JPEG")
context.add_message({
"content": frame.text,
"role": "user",
"data": buffer,
"mime_type": "image/jpeg"
})
return context
def add_message(self, message: ChatCompletionMessageParam):
self.messages.append(message)
def get_messages(self) -> List[ChatCompletionMessageParam]:
return self.messages
def get_messages_json(self) -> str:
return json.dumps(self.messages, cls=CustomEncoder)
def set_tool_choice(
self, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven
):
self.tool_choice = tool_choice
def set_tools(self, tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN):
if tools != NOT_GIVEN and len(tools) == 0:
tools = NOT_GIVEN
self.tools = tools
@dataclass
class OpenAILLMContextFrame(Frame):
"""Like an LLMMessagesFrame, but with extra context specific to the OpenAI
API. The context in this message is also mutable, and will be changed by the
OpenAIContextAggregator frame processor.
"""
context: OpenAILLMContext