storytelling-backup

Running

App Files Files Community

storytelling-backup / pipecat /processors /aggregators /openai_llm_context.py

lucy1118

Upload 78 files

8d7f55c verified about 1 year ago

raw

history blame contribute delete

3.58 kB

	#
	# Copyright (c) 2024, Daily
	#
	# SPDX-License-Identifier: BSD 2-Clause License
	#

	from dataclasses import dataclass
	import io
	import json

	from typing import List

	from PIL import Image

	from pipecat.frames.frames import Frame, VisionImageRawFrame

	from openai._types import NOT_GIVEN, NotGiven

	from openai.types.chat import (
	ChatCompletionToolParam,
	ChatCompletionToolChoiceOptionParam,
	ChatCompletionMessageParam
	)

	# JSON custom encoder to handle bytes arrays so that we can log contexts
	# with images to the console.


	class CustomEncoder(json.JSONEncoder):
	def default(self, obj):
	if isinstance(obj, io.BytesIO):
	# Convert the first 8 bytes to an ASCII hex string
	return (f"{obj.getbuffer()[0:8].hex()}...")
	return super().default(obj)


	class OpenAILLMContext:

	def __init__(
	self,
	messages: List[ChatCompletionMessageParam] \| None = None,
	tools: List[ChatCompletionToolParam] \| NotGiven = NOT_GIVEN,
	tool_choice: ChatCompletionToolChoiceOptionParam \| NotGiven = NOT_GIVEN
	):
	self.messages: List[ChatCompletionMessageParam] = messages if messages else [
	]
	self.tool_choice: ChatCompletionToolChoiceOptionParam \| NotGiven = tool_choice
	self.tools: List[ChatCompletionToolParam] \| NotGiven = tools

	@staticmethod
	def from_messages(messages: List[dict]) -> "OpenAILLMContext":
	context = OpenAILLMContext()
	for message in messages:
	context.add_message({
	"content": message["content"],
	"role": message["role"],
	"name": message["name"] if "name" in message else message["role"]
	})
	return context

	@staticmethod
	def from_image_frame(frame: VisionImageRawFrame) -> "OpenAILLMContext":
	"""
	For images, we are deviating from the OpenAI messages shape. OpenAI
	expects images to be base64 encoded, but other vision models may not.
	So we'll store the image as bytes and do the base64 encoding as needed
	in the LLM service.
	"""
	context = OpenAILLMContext()
	buffer = io.BytesIO()
	Image.frombytes(
	frame.format,
	frame.size,
	frame.image
	).save(
	buffer,
	format="JPEG")
	context.add_message({
	"content": frame.text,
	"role": "user",
	"data": buffer,
	"mime_type": "image/jpeg"
	})
	return context

	def add_message(self, message: ChatCompletionMessageParam):
	self.messages.append(message)

	def get_messages(self) -> List[ChatCompletionMessageParam]:
	return self.messages

	def get_messages_json(self) -> str:
	return json.dumps(self.messages, cls=CustomEncoder)

	def set_tool_choice(
	self, tool_choice: ChatCompletionToolChoiceOptionParam \| NotGiven
	):
	self.tool_choice = tool_choice

	def set_tools(self, tools: List[ChatCompletionToolParam] \| NotGiven = NOT_GIVEN):
	if tools != NOT_GIVEN and len(tools) == 0:
	tools = NOT_GIVEN

	self.tools = tools


	@dataclass
	class OpenAILLMContextFrame(Frame):
	"""Like an LLMMessagesFrame, but with extra context specific to the OpenAI
	API. The context in this message is also mutable, and will be changed by the
	OpenAIContextAggregator frame processor.

	"""
	context: OpenAILLMContext