Spaces:
Paused
Paused
| """ | |
| Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen` | |
| """ | |
| import io | |
| from typing import List, Optional, Union | |
| from httpx import Headers, Response | |
| from litellm.llms.base_llm.chat.transformation import BaseLLMException | |
| from litellm.secret_managers.main import get_secret_str | |
| from litellm.types.llms.openai import ( | |
| AllMessageValues, | |
| OpenAIAudioTranscriptionOptionalParams, | |
| ) | |
| from litellm.types.utils import FileTypes, TranscriptionResponse | |
| from ...base_llm.audio_transcription.transformation import ( | |
| BaseAudioTranscriptionConfig, | |
| LiteLLMLoggingObj, | |
| ) | |
| from ..common_utils import DeepgramException | |
| class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig): | |
| def get_supported_openai_params( | |
| self, model: str | |
| ) -> List[OpenAIAudioTranscriptionOptionalParams]: | |
| return ["language"] | |
| def map_openai_params( | |
| self, | |
| non_default_params: dict, | |
| optional_params: dict, | |
| model: str, | |
| drop_params: bool, | |
| ) -> dict: | |
| supported_params = self.get_supported_openai_params(model) | |
| for k, v in non_default_params.items(): | |
| if k in supported_params: | |
| optional_params[k] = v | |
| return optional_params | |
| def get_error_class( | |
| self, error_message: str, status_code: int, headers: Union[dict, Headers] | |
| ) -> BaseLLMException: | |
| return DeepgramException( | |
| message=error_message, status_code=status_code, headers=headers | |
| ) | |
| def transform_audio_transcription_request( | |
| self, | |
| model: str, | |
| audio_file: FileTypes, | |
| optional_params: dict, | |
| litellm_params: dict, | |
| ) -> Union[dict, bytes]: | |
| """ | |
| Processes the audio file input based on its type and returns the binary data. | |
| Args: | |
| audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes). | |
| Returns: | |
| The binary data of the audio file. | |
| """ | |
| binary_data: bytes # Explicitly declare the type | |
| # Handle the audio file based on type | |
| if isinstance(audio_file, str): | |
| # If it's a file path | |
| with open(audio_file, "rb") as f: | |
| binary_data = f.read() # `f.read()` always returns `bytes` | |
| elif isinstance(audio_file, tuple): | |
| # Handle tuple case | |
| _, file_content = audio_file[:2] | |
| if isinstance(file_content, str): | |
| with open(file_content, "rb") as f: | |
| binary_data = f.read() # `f.read()` always returns `bytes` | |
| elif isinstance(file_content, bytes): | |
| binary_data = file_content | |
| else: | |
| raise TypeError( | |
| f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes." | |
| ) | |
| elif isinstance(audio_file, bytes): | |
| # Assume it's already binary data | |
| binary_data = audio_file | |
| elif isinstance(audio_file, io.BufferedReader) or isinstance( | |
| audio_file, io.BytesIO | |
| ): | |
| # Handle file-like objects | |
| binary_data = audio_file.read() | |
| else: | |
| raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}") | |
| return binary_data | |
| def transform_audio_transcription_response( | |
| self, | |
| model: str, | |
| raw_response: Response, | |
| model_response: TranscriptionResponse, | |
| logging_obj: LiteLLMLoggingObj, | |
| request_data: dict, | |
| optional_params: dict, | |
| litellm_params: dict, | |
| api_key: Optional[str] = None, | |
| ) -> TranscriptionResponse: | |
| """ | |
| Transforms the raw response from Deepgram to the TranscriptionResponse format | |
| """ | |
| try: | |
| response_json = raw_response.json() | |
| # Get the first alternative from the first channel | |
| first_channel = response_json["results"]["channels"][0] | |
| first_alternative = first_channel["alternatives"][0] | |
| # Extract the full transcript | |
| text = first_alternative["transcript"] | |
| # Create TranscriptionResponse object | |
| response = TranscriptionResponse(text=text) | |
| # Add additional metadata matching OpenAI format | |
| response["task"] = "transcribe" | |
| response[ | |
| "language" | |
| ] = "english" # Deepgram auto-detects but doesn't return language | |
| response["duration"] = response_json["metadata"]["duration"] | |
| # Transform words to match OpenAI format | |
| if "words" in first_alternative: | |
| response["words"] = [ | |
| {"word": word["word"], "start": word["start"], "end": word["end"]} | |
| for word in first_alternative["words"] | |
| ] | |
| # Store full response in hidden params | |
| response._hidden_params = response_json | |
| return response | |
| except Exception as e: | |
| raise ValueError( | |
| f"Error transforming Deepgram response: {str(e)}\nResponse: {raw_response.text}" | |
| ) | |
| def get_complete_url( | |
| self, | |
| api_base: Optional[str], | |
| api_key: Optional[str], | |
| model: str, | |
| optional_params: dict, | |
| litellm_params: dict, | |
| stream: Optional[bool] = None, | |
| ) -> str: | |
| if api_base is None: | |
| api_base = ( | |
| get_secret_str("DEEPGRAM_API_BASE") or "https://api.deepgram.com/v1" | |
| ) | |
| api_base = api_base.rstrip("/") # Remove trailing slash if present | |
| return f"{api_base}/listen?model={model}" | |
| def validate_environment( | |
| self, | |
| headers: dict, | |
| model: str, | |
| messages: List[AllMessageValues], | |
| optional_params: dict, | |
| litellm_params: dict, | |
| api_key: Optional[str] = None, | |
| api_base: Optional[str] = None, | |
| ) -> dict: | |
| api_key = api_key or get_secret_str("DEEPGRAM_API_KEY") | |
| return { | |
| "Authorization": f"Token {api_key}", | |
| } | |