import litellm import random from typing import Dict, List, Optional class LiteLLMConfig: def __init__(self, api_key: str, base_url: str, frequency_penalty: float, max_tokens: int, model: str, presence_penalty: float, prompt: str, provider: str, temperature: float, top_p: float, seed: Optional[int] = None,): self.api_key = api_key self.base_url = base_url self.frequency_penalty = frequency_penalty self.max_tokens = max_tokens self.model = model self.presence_penalty = presence_penalty self.prompt = prompt self.provider = provider self.seed = seed if seed is not None else random.randint(0, 10000) self.temperature = temperature self.top_p = top_p @classmethod def default_config(cls): return cls( api_key="", base_url="", max_tokens=512, model="gpt-4o-mini", frequency_penalty=0.9, presence_penalty=0.9, prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", provider="", seed=random.randint(0, 10000), temperature=0.1, top_p=1.0 ) class LiteLLM: def __init__(self, config: LiteLLMConfig): self.config = config def get_chat_completions_stream(self, messages: List[Dict[str, str]]): kwargs = { "api_key": self.config.api_key, "base_url": self.config.base_url, "custom_llm_provider": self.config.provider, "frequency_penalty": self.config.frequency_penalty, "max_tokens": self.config.max_tokens, "messages": [ { "role": "system", "content": self.config.prompt, }, *messages, ], "model": self.config.model, "presence_penalty": self.config.presence_penalty, "seed": self.config.seed, "stream": True, "temperature": self.config.temperature, "top_p": self.config.top_p, } try: response = litellm.completion(**kwargs) return response except Exception as e: raise Exception(f"get_chat_completions_stream failed, err: {e}")