Spaces:

mamogasr
/

llm_engineering

Sleeping

File size: 6,310 Bytes

5fdb69e

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a60e0f78-4637-4318-9ab6-309c3f7f2799",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "if openai_api_key:\n",
    "    print(\"API Key set\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "MODEL = \"gpt-4o-mini\"\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67026ef0-23be-4101-9371-b11f96f505bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TTS\n",
    "\n",
    "from pydub import AudioSegment\n",
    "import os\n",
    "import subprocess\n",
    "from io import BytesIO\n",
    "import tempfile\n",
    "\n",
    "# Set custom temp directory\n",
    "custom_temp_dir = r\"D:\\projects\\llm_engineering-main\\temp\"\n",
    "os.makedirs(custom_temp_dir, exist_ok=True)\n",
    "\n",
    "# Explicitly set FFmpeg paths\n",
    "AudioSegment.converter = r\"D:\\Anaconda3\\envs\\llms\\Library\\bin\\ffmpeg.exe\"\n",
    "AudioSegment.ffprobe = r\"D:\\Anaconda3\\envs\\llms\\Library\\bin\\ffprobe.exe\"\n",
    "\n",
    "def play_audio_with_ffplay(audio_segment, temp_dir):\n",
    "    # Explicitly create and manage a temporary file\n",
    "    temp_file_path = os.path.join(temp_dir, \"temp_output.wav\")\n",
    "    \n",
    "    # Export the audio to the temporary file\n",
    "    audio_segment.export(temp_file_path, format=\"wav\")\n",
    "    \n",
    "    try:\n",
    "        # Play the audio using ffplay\n",
    "        subprocess.call([\"ffplay\", \"-nodisp\", \"-autoexit\", temp_file_path])\n",
    "    finally:\n",
    "        # Clean up the temporary file after playback\n",
    "        if os.path.exists(temp_file_path):\n",
    "            os.remove(temp_file_path)\n",
    "\n",
    "def talker(message):\n",
    "    # Mocked OpenAI response for testing\n",
    "    response = openai.audio.speech.create(\n",
    "        model=\"tts-1\",\n",
    "        voice=\"nova\",\n",
    "        input=message\n",
    "    )\n",
    "    \n",
    "    # Handle audio stream\n",
    "    audio_stream = BytesIO(response.content)\n",
    "    audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
    "    \n",
    "    # Play the audio\n",
    "    play_audio_with_ffplay(audio, custom_temp_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12c66b44-293a-4bf9-b81e-0f6905fbf607",
   "metadata": {},
   "outputs": [],
   "source": [
    "# STT Whisper\n",
    "\n",
    "import whisper\n",
    "import sounddevice as sd\n",
    "import numpy as np\n",
    "from scipy.io.wavfile import write\n",
    "\n",
    "def record_audio(temp_dir, duration=5, samplerate=16000, device_id=2):\n",
    "    # print(f\"Recording for {duration} seconds...\")\n",
    "    sd.default.device = (device_id, None)\n",
    "    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=\"int16\")\n",
    "    sd.wait()  # Wait until the recording is finished\n",
    "    \n",
    "    audio_path = os.path.join(temp_dir, \"mic_input.wav\")\n",
    "    write(audio_path, samplerate, audio)\n",
    "    # print(f\"Audio recorded and saved to {audio_path}\")\n",
    "\n",
    "    return audio_path\n",
    "\n",
    "\n",
    "whisper_model = whisper.load_model(\"base\")\n",
    "def transcribe_audio(audio_path):    \n",
    "    # print(\"Transcribing audio...\")\n",
    "    result = whisper_model.transcribe(audio_path, language=\"en\")\n",
    "    return result[\"text\"]\n",
    "\n",
    "def mic_to_text():\n",
    "    audio_path = record_audio(custom_temp_dir, duration=10)\n",
    "    transcription = transcribe_audio(audio_path)\n",
    "    # print(f\"Transcription: {transcription}\")\n",
    "    return transcription"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0156c106-1844-444a-9a22-88c3475805d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Chat Functions\n",
    "\n",
    "import requests\n",
    "history = [{\"role\": \"system\", \"content\": \"You are Nova the friendly robot. Reply within couple of sentences.\"}]\n",
    "\n",
    "def run_chat():\n",
    "    running = True\n",
    "    while running:\n",
    "        input_text = input(\"press Enter to talk\")        \n",
    "        user_input = input_text if input_text.strip() else mic_to_text()\n",
    "        running = False if input_text == \"bye\" or user_input.strip() == \"bye\" else True\n",
    "        print(f\"\\nYou: {user_input}\\n\\n\")\n",
    "        history.append({\"role\": \"user\", \"content\": user_input})    \n",
    "        api_run = requests.post(\n",
    "            \"http://localhost:11434/api/chat\", \n",
    "            json={\n",
    "                \"model\": \"llama3.2\",\n",
    "                \"messages\": history,\n",
    "                \"stream\": False\n",
    "            }, \n",
    "            headers={\"Content-Type\": \"application/json\"}\n",
    "        )\n",
    "        output_message = api_run.json()['message']['content']\n",
    "        print(f\"Nova: {output_message}\\n\\n\")        \n",
    "        talker(output_message)\n",
    "        history.append({\"role\": \"assistant\", \"content\": output_message})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de61b54e-387e-4480-a592-c78e3245ddde",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_chat()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ce16bee7-6ea6-46d5-a407-385e6ae31db8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}