Palbha Kulkarni (Nazwale) commited on
Commit
4460c42
·
1 Parent(s): bd044ff

Created using Colab

Browse files
Files changed (1) hide show
  1. faq_data_generator.ipynb +232 -0
faq_data_generator.ipynb ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "authorship_tag": "ABX9TyNAxD9Hy7SaN4kD/p7d0PC5",
8
+ "include_colab_link": true
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {
22
+ "id": "view-in-github",
23
+ "colab_type": "text"
24
+ },
25
+ "source": [
26
+ "<a href=\"https://colab.research.google.com/github/palbha/airline-faq-rag/blob/main/faq_data_generator.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "markdown",
31
+ "source": [
32
+ "# Install necessary libraries if required - This code ran on Google Colab & the libraires where supported by default- please rephrase this\n",
33
+ "\n"
34
+ ],
35
+ "metadata": {
36
+ "id": "-Ruq0mXsA9do"
37
+ }
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "source": [
42
+ "import csv\n",
43
+ "import json\n",
44
+ "import os\n",
45
+ "import openai\n",
46
+ "import csv\n",
47
+ "from openai import OpenAI\n",
48
+ "from google.colab import userdata\n",
49
+ "\n",
50
+ "#Based on airlines FAQ I identified potential topics which can be shared with our agent to create FAQ's\n",
51
+ "FAQ_TOPICS = [\n",
52
+ " \"Airport Services\",\n",
53
+ " \"Animal Transportation\",\n",
54
+ " \"Beyond Business\",\n",
55
+ " \"Booking and managing a reservation\",\n",
56
+ " \"Carbon Offsetting\",\n",
57
+ " \"AirlineX Compliance\",\n",
58
+ " \"Hotels, cars and travel insurance\",\n",
59
+ " \"AirlineX Offers\",\n",
60
+ " \"On-board experience\",\n",
61
+ " \"Operational Updates\",\n",
62
+ " \"Payments\",\n",
63
+ " \"Privilege Club : Qatar Airways' loyalty programme\",\n",
64
+ " \"ArlineX Airways Affiliate Program\",\n",
65
+ " \"ArlineX Airways Packages\",\n",
66
+ " \"ATravel - ArlineX Loyalty Program\",\n",
67
+ " \"ATravel - ArlineX Loyalty Program - Account Cancellation\",\n",
68
+ " \"ATravel - ArlineX Loyalty Program - Account Management\",\n",
69
+ " \"ATravel - ArlineX Loyalty Program - Booking Terms and Conditions\",\n",
70
+ " \"Travel Baggage\",\n",
71
+ " \"Baggage\",\n",
72
+ " \"BAGTAG\",\n",
73
+ " \"Hand baggage\",\n",
74
+ " \"Liquids\",\n",
75
+ " \"Mishandled baggage\",\n",
76
+ " \"Travel voucher\",\n",
77
+ " \"Voucher redemption\",\n",
78
+ " \"TripAdd\",\n",
79
+ " \"eSIM - TripAdd\",\n",
80
+ " \"Lounge - TripAdd\",\n",
81
+ " \"Meet and Greet - TripAdd\",\n",
82
+ " \"Young Travellers\",\n",
83
+ " \"Travelling with children\",\n",
84
+ " \"Unaccompanied minors\",\n",
85
+ "]"
86
+ ],
87
+ "metadata": {
88
+ "id": "aAvEjDrrRNJE"
89
+ },
90
+ "execution_count": 2,
91
+ "outputs": []
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "source": [
96
+ "openai = OpenAI(\n",
97
+ " base_url=\"https://generativelanguage.googleapis.com/v1beta/\",\n",
98
+ " api_key=userdata.get('gemini_api'),\n",
99
+ ")"
100
+ ],
101
+ "metadata": {
102
+ "id": "SWnjOAg7BeO_"
103
+ },
104
+ "execution_count": 3,
105
+ "outputs": []
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "source": [
110
+ "def get_faq_ques_for_topic(topic):\n",
111
+ " messages = [\n",
112
+ " {\n",
113
+ " \"role\": \"system\",\n",
114
+ " \"content\": (\n",
115
+ " \"\"\"You are an assistant that generates FAQ-style questions for an airline named Airline X, which operates international and domestic flights in Canada.\n",
116
+ "\n",
117
+ "For each topic, Generate realistic and informative user-style questions for the FAQ topic. Do no include answers\n",
118
+ "\"\"\"\n",
119
+ " )\n",
120
+ " },\n",
121
+ " {\n",
122
+ " \"role\": \"user\",\n",
123
+ " \"content\": f\"Generate FAQ 5-10 questions about the topic: '{topic}'.\"\n",
124
+ " },\n",
125
+ " {\n",
126
+ " \"role\": \"system\",\n",
127
+ " \"content\": \"Return ONLY a valid JSON array of question objects. Do not include answers\"\n",
128
+ " }\n",
129
+ " ]\n",
130
+ " response = openai.chat.completions.create(\n",
131
+ " model=\"gemini-1.5-flash\",\n",
132
+ " messages=messages,\n",
133
+ " temperature=0.7,\n",
134
+ " max_tokens=700,\n",
135
+ " )\n",
136
+ " # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
137
+ " content = response.choices[0].message.content.strip()\n",
138
+ "\n",
139
+ "\n",
140
+ " return content\n",
141
+ "\n"
142
+ ],
143
+ "metadata": {
144
+ "id": "64J-8B5hRSMf"
145
+ },
146
+ "execution_count": 4,
147
+ "outputs": []
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "source": [
152
+ "def get_faq_ans_for_topic(topic,question):\n",
153
+ " messages = [\n",
154
+ " {\n",
155
+ " \"role\": \"system\",\n",
156
+ " \"content\": (\n",
157
+ " f\"You are an assistant that generates FAQ-style answers for an airline named Airline X, \"\n",
158
+ " f\"which operates international and domestic flights in Canada.\\n\\n\"\n",
159
+ " f\"The FAQ topic is: '{topic}'.\\n\"\n",
160
+ "\n",
161
+ " )\n",
162
+ " },\n",
163
+ " {\n",
164
+ " \"role\": \"user\",\n",
165
+ " \"content\": f\"Generate FAQ answers for the question: '{question}'.\"\n",
166
+ " } ,\n",
167
+ " {\n",
168
+ " \"role\": \"system\",\n",
169
+ " \"content\": \"Provide a clear, self-contained, and factual-sounding answer based on Airline X's own policies. \"\n",
170
+ " \"Do NOT reference any website, support, or external links. Make the answer complete, realistic, \"\n",
171
+ " \"and independent of outside context. Return ONLY answers\"\n",
172
+ " }\n",
173
+ " ]\n",
174
+ " response = openai.chat.completions.create(\n",
175
+ " model=\"gemini-1.5-flash\",\n",
176
+ " messages=messages,\n",
177
+ " temperature=0.7,\n",
178
+ " max_tokens=700,\n",
179
+ " )\n",
180
+ " # The model response should be a JSON array of objects like: [{\"question\": \"...\", \"answer\": \"...\"}, ...]\n",
181
+ " content = response.choices[0].message.content.strip()\n",
182
+ "\n",
183
+ "\n",
184
+ " return content\n",
185
+ "\n"
186
+ ],
187
+ "metadata": {
188
+ "id": "wZhnhpHLSi7c"
189
+ },
190
+ "execution_count": 5,
191
+ "outputs": []
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "source": [
196
+ "faq_data = []\n",
197
+ "import re\n",
198
+ "for topic in FAQ_TOPICS:\n",
199
+ " questions=get_faq_ques_for_topic(topic)\n",
200
+ " raw_text = questions.strip()\n",
201
+ " cleaned = re.sub(r\"^```json|```$\", \"\", raw_text, flags=re.IGNORECASE).strip(\"`\\n \")\n",
202
+ "\n",
203
+ " # Now attempt to parse\n",
204
+ " question_data = json.loads(cleaned)\n",
205
+ " for key in question_data:\n",
206
+ " answer=get_faq_ans_for_topic(topic,key['question'])\n",
207
+ " faq_data.append({\n",
208
+ " \"topic\": topic,\n",
209
+ " \"question\": key['question'],\n",
210
+ " \"answer\": answer\n",
211
+ " })\n"
212
+ ],
213
+ "metadata": {
214
+ "id": "bzTtCQijTlmq"
215
+ },
216
+ "execution_count": 6,
217
+ "outputs": []
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "source": [
222
+ "import pandas as pd\n",
223
+ "pd.DataFrame(faq_data).to_csv(\"faq_data.csv\",index=False)"
224
+ ],
225
+ "metadata": {
226
+ "id": "nKwJfsMEWstr"
227
+ },
228
+ "execution_count": 7,
229
+ "outputs": []
230
+ }
231
+ ]
232
+ }