Spaces:

palbha
/

airline_faq_rag_agent

Sleeping

App Files Files Community

Palbha Kulkarni (Nazwale) commited on May 15

Commit

4460c42

1 Parent(s): bd044ff

Created using Colab

Browse files

Files changed (1) hide show

faq_data_generator.ipynb +232 -0

faq_data_generator.ipynb ADDED Viewed

	@@ -0,0 +1,232 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyNAxD9Hy7SaN4kD/p7d0PC5",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/palbha/airline-faq-rag/blob/main/faq_data_generator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Install necessary libraries if required - This code ran on Google Colab & the libraires where supported by default- please rephrase this\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "-Ruq0mXsA9do"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import csv\n",
+        "import json\n",
+        "import os\n",
+        "import openai\n",
+        "import csv\n",
+        "from openai import OpenAI\n",
+        "from google.colab import userdata\n",
+        "\n",
+        "#Based on airlines FAQ I identified potential topics which can be shared with our agent to create FAQ's\n",
+        "FAQ_TOPICS = [\n",
+        "    \"Airport Services\",\n",
+        "    \"Animal Transportation\",\n",
+        "    \"Beyond Business\",\n",
+        "    \"Booking and managing a reservation\",\n",
+        "    \"Carbon Offsetting\",\n",
+        "    \"AirlineX Compliance\",\n",
+        "    \"Hotels, cars and travel insurance\",\n",
+        "    \"AirlineX Offers\",\n",
+        "    \"On-board experience\",\n",
+        "    \"Operational Updates\",\n",
+        "    \"Payments\",\n",
+        "    \"Privilege Club : Qatar Airways' loyalty programme\",\n",
+        "    \"ArlineX Airways Affiliate Program\",\n",
+        "    \"ArlineX Airways Packages\",\n",
+        "    \"ATravel - ArlineX Loyalty Program\",\n",
+        "    \"ATravel - ArlineX Loyalty Program - Account Cancellation\",\n",
+        "    \"ATravel - ArlineX Loyalty Program - Account Management\",\n",
+        "    \"ATravel - ArlineX Loyalty Program - Booking Terms and Conditions\",\n",
+        "    \"Travel Baggage\",\n",
+        "    \"Baggage\",\n",
+        "    \"BAGTAG\",\n",
+        "    \"Hand baggage\",\n",
+        "    \"Liquids\",\n",
+        "    \"Mishandled baggage\",\n",
+        "    \"Travel voucher\",\n",
+        "    \"Voucher redemption\",\n",
+        "    \"TripAdd\",\n",
+        "    \"eSIM - TripAdd\",\n",
+        "    \"Lounge - TripAdd\",\n",
+        "    \"Meet and Greet - TripAdd\",\n",
+        "    \"Young Travellers\",\n",
+        "    \"Travelling with children\",\n",
+        "    \"Unaccompanied minors\",\n",
+        "]"
+      ],
+      "metadata": {
+        "id": "aAvEjDrrRNJE"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "openai = OpenAI(\n",
+        "   base_url=\"https://generativelanguage.googleapis.com/v1beta/\",\n",
+        "  api_key=userdata.get('gemini_api'),\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "SWnjOAg7BeO_"
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def get_faq_ques_for_topic(topic):\n",
+        "    messages = [\n",
+        "        {\n",
+        "            \"role\": \"system\",\n",
+        "            \"content\": (\n",
+        "                \"\"\"You are an assistant that generates FAQ-style questions  for an airline named Airline X, which operates international and domestic flights in Canada.\n",
+        "\n",
+        "For each topic, Generate  realistic and informative user-style questions for the FAQ topic. Do no include answers\n",
+        "\"\"\"\n",
+        "            )\n",
+        "        },\n",
+        "        {\n",
+        "            \"role\": \"user\",\n",
+        "            \"content\": f\"Generate FAQ 5-10 questions  about the topic: '{topic}'.\"\n",
+        "        },\n",
+        "        {\n",
+        "            \"role\": \"system\",\n",
+        "            \"content\": \"Return ONLY a valid JSON array of question objects. Do not include answers\"\n",
+        "        }\n",
+        "    ]\n",
+        "    response = openai.chat.completions.create(\n",
+        "        model=\"gemini-1.5-flash\",\n",
+        "        messages=messages,\n",
+        "        temperature=0.7,\n",
+        "        max_tokens=700,\n",
+        "    )\n",
+        "    # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
+        "    content = response.choices[0].message.content.strip()\n",
+        "\n",
+        "\n",
+        "    return content\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "64J-8B5hRSMf"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def get_faq_ans_for_topic(topic,question):\n",
+        "    messages = [\n",
+        "        {\n",
+        "            \"role\": \"system\",\n",
+        "            \"content\": (\n",
+        "                f\"You are an assistant that generates FAQ-style answers for an airline named Airline X, \"\n",
+        "        f\"which operates international and domestic flights in Canada.\\n\\n\"\n",
+        "        f\"The FAQ topic is: '{topic}'.\\n\"\n",
+        "\n",
+        "            )\n",
+        "        },\n",
+        "        {\n",
+        "            \"role\": \"user\",\n",
+        "            \"content\": f\"Generate FAQ answers for the question: '{question}'.\"\n",
+        "        }       ,\n",
+        "        {\n",
+        "            \"role\": \"system\",\n",
+        "            \"content\": \"Provide a clear, self-contained, and factual-sounding answer based on Airline X's own policies. \"\n",
+        "        \"Do NOT reference any website, support, or external links. Make the answer complete, realistic, \"\n",
+        "        \"and independent of outside context. Return ONLY answers\"\n",
+        "        }\n",
+        "    ]\n",
+        "    response = openai.chat.completions.create(\n",
+        "        model=\"gemini-1.5-flash\",\n",
+        "        messages=messages,\n",
+        "        temperature=0.7,\n",
+        "        max_tokens=700,\n",
+        "    )\n",
+        "    # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
+        "    content = response.choices[0].message.content.strip()\n",
+        "\n",
+        "\n",
+        "    return content\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "wZhnhpHLSi7c"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "faq_data = []\n",
+        "import re\n",
+        "for topic in FAQ_TOPICS:\n",
+        "    questions=get_faq_ques_for_topic(topic)\n",
+        "    raw_text = questions.strip()\n",
+        "    cleaned = re.sub(r\"^```json|```$\", \"\", raw_text, flags=re.IGNORECASE).strip(\"`\\n \")\n",
+        "\n",
+        "    # Now attempt to parse\n",
+        "    question_data = json.loads(cleaned)\n",
+        "    for key in question_data:\n",
+        "      answer=get_faq_ans_for_topic(topic,key['question'])\n",
+        "      faq_data.append({\n",
+        "            \"topic\": topic,\n",
+        "            \"question\": key['question'],\n",
+        "            \"answer\": answer\n",
+        "        })\n"
+      ],
+      "metadata": {
+        "id": "bzTtCQijTlmq"
+      },
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "pd.DataFrame(faq_data).to_csv(\"faq_data.csv\",index=False)"
+      ],
+      "metadata": {
+        "id": "nKwJfsMEWstr"
+      },
+      "execution_count": 7,
+      "outputs": []
+    }
+  ]
+}