Spaces:

palbha
/

airline_faq_rag_agent

Sleeping

File size: 8,186 Bytes

4460c42

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyNAxD9Hy7SaN4kD/p7d0PC5",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/palbha/airline-faq-rag/blob/main/faq_data_generator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Install necessary libraries if required - This code ran on Google Colab & the libraires where supported by default- please rephrase this\n",
        "\n"
      ],
      "metadata": {
        "id": "-Ruq0mXsA9do"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "import json\n",
        "import os\n",
        "import openai\n",
        "import csv\n",
        "from openai import OpenAI\n",
        "from google.colab import userdata\n",
        "\n",
        "#Based on airlines FAQ I identified potential topics which can be shared with our agent to create FAQ's\n",
        "FAQ_TOPICS = [\n",
        "    \"Airport Services\",\n",
        "    \"Animal Transportation\",\n",
        "    \"Beyond Business\",\n",
        "    \"Booking and managing a reservation\",\n",
        "    \"Carbon Offsetting\",\n",
        "    \"AirlineX Compliance\",\n",
        "    \"Hotels, cars and travel insurance\",\n",
        "    \"AirlineX Offers\",\n",
        "    \"On-board experience\",\n",
        "    \"Operational Updates\",\n",
        "    \"Payments\",\n",
        "    \"Privilege Club : Qatar Airways' loyalty programme\",\n",
        "    \"ArlineX Airways Affiliate Program\",\n",
        "    \"ArlineX Airways Packages\",\n",
        "    \"ATravel - ArlineX Loyalty Program\",\n",
        "    \"ATravel - ArlineX Loyalty Program - Account Cancellation\",\n",
        "    \"ATravel - ArlineX Loyalty Program - Account Management\",\n",
        "    \"ATravel - ArlineX Loyalty Program - Booking Terms and Conditions\",\n",
        "    \"Travel Baggage\",\n",
        "    \"Baggage\",\n",
        "    \"BAGTAG\",\n",
        "    \"Hand baggage\",\n",
        "    \"Liquids\",\n",
        "    \"Mishandled baggage\",\n",
        "    \"Travel voucher\",\n",
        "    \"Voucher redemption\",\n",
        "    \"TripAdd\",\n",
        "    \"eSIM - TripAdd\",\n",
        "    \"Lounge - TripAdd\",\n",
        "    \"Meet and Greet - TripAdd\",\n",
        "    \"Young Travellers\",\n",
        "    \"Travelling with children\",\n",
        "    \"Unaccompanied minors\",\n",
        "]"
      ],
      "metadata": {
        "id": "aAvEjDrrRNJE"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "openai = OpenAI(\n",
        "   base_url=\"https://generativelanguage.googleapis.com/v1beta/\",\n",
        "  api_key=userdata.get('gemini_api'),\n",
        ")"
      ],
      "metadata": {
        "id": "SWnjOAg7BeO_"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def get_faq_ques_for_topic(topic):\n",
        "    messages = [\n",
        "        {\n",
        "            \"role\": \"system\",\n",
        "            \"content\": (\n",
        "                \"\"\"You are an assistant that generates FAQ-style questions  for an airline named Airline X, which operates international and domestic flights in Canada.\n",
        "\n",
        "For each topic, Generate  realistic and informative user-style questions for the FAQ topic. Do no include answers\n",
        "\"\"\"\n",
        "            )\n",
        "        },\n",
        "        {\n",
        "            \"role\": \"user\",\n",
        "            \"content\": f\"Generate FAQ 5-10 questions  about the topic: '{topic}'.\"\n",
        "        },\n",
        "        {\n",
        "            \"role\": \"system\",\n",
        "            \"content\": \"Return ONLY a valid JSON array of question objects. Do not include answers\"\n",
        "        }\n",
        "    ]\n",
        "    response = openai.chat.completions.create(\n",
        "        model=\"gemini-1.5-flash\",\n",
        "        messages=messages,\n",
        "        temperature=0.7,\n",
        "        max_tokens=700,\n",
        "    )\n",
        "    # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
        "    content = response.choices[0].message.content.strip()\n",
        "\n",
        "\n",
        "    return content\n",
        "\n"
      ],
      "metadata": {
        "id": "64J-8B5hRSMf"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def get_faq_ans_for_topic(topic,question):\n",
        "    messages = [\n",
        "        {\n",
        "            \"role\": \"system\",\n",
        "            \"content\": (\n",
        "                f\"You are an assistant that generates FAQ-style answers for an airline named Airline X, \"\n",
        "        f\"which operates international and domestic flights in Canada.\\n\\n\"\n",
        "        f\"The FAQ topic is: '{topic}'.\\n\"\n",
        "\n",
        "            )\n",
        "        },\n",
        "        {\n",
        "            \"role\": \"user\",\n",
        "            \"content\": f\"Generate FAQ answers for the question: '{question}'.\"\n",
        "        }       ,\n",
        "        {\n",
        "            \"role\": \"system\",\n",
        "            \"content\": \"Provide a clear, self-contained, and factual-sounding answer based on Airline X's own policies. \"\n",
        "        \"Do NOT reference any website, support, or external links. Make the answer complete, realistic, \"\n",
        "        \"and independent of outside context. Return ONLY answers\"\n",
        "        }\n",
        "    ]\n",
        "    response = openai.chat.completions.create(\n",
        "        model=\"gemini-1.5-flash\",\n",
        "        messages=messages,\n",
        "        temperature=0.7,\n",
        "        max_tokens=700,\n",
        "    )\n",
        "    # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
        "    content = response.choices[0].message.content.strip()\n",
        "\n",
        "\n",
        "    return content\n",
        "\n"
      ],
      "metadata": {
        "id": "wZhnhpHLSi7c"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "faq_data = []\n",
        "import re\n",
        "for topic in FAQ_TOPICS:\n",
        "    questions=get_faq_ques_for_topic(topic)\n",
        "    raw_text = questions.strip()\n",
        "    cleaned = re.sub(r\"^```json|```$\", \"\", raw_text, flags=re.IGNORECASE).strip(\"`\\n \")\n",
        "\n",
        "    # Now attempt to parse\n",
        "    question_data = json.loads(cleaned)\n",
        "    for key in question_data:\n",
        "      answer=get_faq_ans_for_topic(topic,key['question'])\n",
        "      faq_data.append({\n",
        "            \"topic\": topic,\n",
        "            \"question\": key['question'],\n",
        "            \"answer\": answer\n",
        "        })\n"
      ],
      "metadata": {
        "id": "bzTtCQijTlmq"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "pd.DataFrame(faq_data).to_csv(\"faq_data.csv\",index=False)"
      ],
      "metadata": {
        "id": "nKwJfsMEWstr"
      },
      "execution_count": 7,
      "outputs": []
    }
  ]
}