# # Copyright (c) 2024–2025, Daily # # SPDX-License-Identifier: BSD 2-Clause License # import argparse import asyncio import os import sys from loguru import logger from call_connection_manager import CallConfigManager, SessionManager from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import ( AudioRawFrame, EndTaskFrame, Frame, LLMMessagesFrame, TranscriptionFrame, ) from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.services.daily import DailyParams, DailyTransport logger.remove(0) logger.add(sys.stderr, level="DEBUG") class VoicemailDetectionProcessor(FrameProcessor): def __init__(self, session_manager, call_config_manager, task): super().__init__() self.session_manager = session_manager self.call_config_manager = call_config_manager self.task = task async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) if direction == FrameDirection.DOWNSTREAM: if isinstance(frame, TranscriptionFrame): logger.debug(f"Transcription: {frame.text}") if not self.session_manager.call_flow_state.voicemail_detected: if "voicemail" in frame.text.lower() or "leave a message" in frame.text.lower(): logger.info("Voicemail detected") self.session_manager.call_flow_state.set_voicemail_detected() content = "Voicemail detected, leaving a message." message = self.call_config_manager.create_system_message(content) await self.task.queue_frames([LLMMessagesFrame([message])]) else: logger.info("Human detected") self.session_manager.call_flow_state.set_human_detected() await self.push_frame(frame, direction) async def main(room_url: str, token: str, body: dict): call_config_manager = CallConfigManager.from_json_string(body) if body else CallConfigManager() dialout_settings = call_config_manager.get_dialout_settings() test_mode = call_config_manager.is_test_mode() session_manager = SessionManager() # ------------ TRANSPORT SETUP ------------ transport_params = DailyParams( api_url=os.environ.get("DAILY_API_URL", "https://api.daily.co/v1"), api_key=os.environ.get("HF_DAILY_API_KEY", ""), audio_in_enabled=True, audio_out_enabled=True, video_out_enabled=False, vad_analyzer=SileroVADAnalyzer(), transcription_enabled=False, # Deepgram will handle transcription ) transport = DailyTransport(room_url, token, "Voicemail Detection Bot", transport_params) tts = CartesiaTTSService( api_key=os.environ.get("HF_CARTESIA_API_KEY", ""), voice_id="b7d50908-b17c-442d-ad8d-810c63997ed9", ) stt = DeepgramSTTService( api_key=os.environ.get("HF_DEEPGRAM_API_KEY", ""), model="nova-2", ) llm = OpenAILLMService(api_key=os.environ.get("HF_OPENAI_API_KEY")) # ------------ LLM AND CONTEXT SETUP ------------ system_instruction = """You are a friendly, helpful robot. If a human answers, greet them and ask how you can assist. If a voicemail is detected, leave a brief message: 'Hello, this is a test call from Pipecat. Please call us back at your convenience.'""" messages = [call_config_manager.create_system_message(system_instruction)] context = OpenAILLMContext(messages) context_aggregator = llm.create_context_aggregator(context) # ------------ PIPELINE SETUP ------------ voicemail_detector = VoicemailDetectionProcessor(session_manager, call_config_manager, task) pipeline = Pipeline([ transport.input(), stt, voicemail_detector, context_aggregator.user(), llm, tts, transport.output(), context_aggregator.assistant(), ]) task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True)) @transport.event_handler("on_first_participant_joined") async def on_first_participant_joined(transport, participant): await task.queue_frames([context_aggregator.user().get_context_frame()]) @transport.event_handler("on_participant_left") async def on_participant_left(transport, participant, reason): logger.debug(f"Participant left: {participant}, reason: {reason}") await task.cancel() # ------------ DIALOUT ------------ if not test_mode: await call_config_manager.start_dialout(transport, dialout_settings) # ------------ RUN PIPELINE ------------ runner = PipelineRunner() await runner.run(task) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Pipecat Voicemail Detection Bot") parser.add_argument("-u", "--url", type=str, help="Room URL") parser.add_argument("-t", "--token", type=str, help="Room Token") parser.add_argument("-b", "--body", type=str, help="JSON configuration string") args = parser.parse_args() logger.info(f"Room URL: {args.url}") logger.info(f"Token: {args.token}") logger.info(f"Body provided: {bool(args.body)}") asyncio.run(main(args.url, args.token, args.body))