theshresthshukla commited on
Commit
d74d03f
Β·
verified Β·
1 Parent(s): 6355672

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +150 -0
  2. readme.txt +34 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ import os
3
+ import gradio as gr
4
+ from langchain.chains import ConversationChain
5
+ from langchain.memory import ConversationBufferMemory
6
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
7
+ from langchain.document_loaders import PyPDFLoader
8
+ from langchain_experimental.text_splitter import SemanticChunker
9
+ from langchain.vectorstores import FAISS
10
+ from gtts import gTTS
11
+ import tempfile
12
+
13
+ # Set your API key from Hugging Face Secrets
14
+ # DO NOT hardcode your API key here
15
+ GROQ_API_KEY = os.environ.get('GROQ_API_KEY')
16
+
17
+ # Initialize Groq LLM
18
+ llm = ChatGroq(
19
+ model_name="llama3-70b-8192",
20
+ temperature=0.7,
21
+ api_key=GROQ_API_KEY
22
+ )
23
+
24
+ # Initialize memory
25
+ memory = ConversationBufferMemory()
26
+ conversation = ConversationChain(llm=llm, memory=memory)
27
+
28
+ # Load PDF and create embeddings
29
+ def initialize_rag():
30
+ try:
31
+ # Load the PDF document
32
+ loader = PyPDFLoader("TourismChatbot.pdf")
33
+ pages = loader.load_and_split()
34
+
35
+ # Create embeddings
36
+ embed_model = FastEmbedEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
37
+
38
+ # Create semantic chunks
39
+ semantic_chunker = SemanticChunker(embed_model, breakpoint_threshold_type="percentile")
40
+ semantic_chunks = semantic_chunker.create_documents([d.page_content for d in pages])
41
+
42
+ # Create vector store
43
+ vectorstore = FAISS.from_documents(documents=semantic_chunks, embedding=embed_model)
44
+
45
+ return vectorstore, embed_model
46
+ except Exception as e:
47
+ print(f"Error initializing RAG: {e}")
48
+ # Return None if initialization fails
49
+ return None, None
50
+
51
+ # Initialize RAG components
52
+ vectorstore, embed_model = initialize_rag()
53
+
54
+ # Function to retrieve relevant information from the vector store
55
+ def retrieve_relevant_chunks(query, top_k=3):
56
+ try:
57
+ if vectorstore is not None:
58
+ documents = vectorstore.similarity_search(query, k=top_k)
59
+ return [doc.page_content for doc in documents]
60
+ else:
61
+ # Fallback content if vectorstore is not available
62
+ return ["Rajasthan is a state in India known for its forts, palaces, and desert landscapes."]
63
+ except Exception as e:
64
+ print(f"Error retrieving chunks: {e}")
65
+ return ["Rajasthan is a state in India known for its forts, palaces, and desert landscapes."]
66
+
67
+ def generate_rag_response(query, language="English"):
68
+ retrieved_chunks = retrieve_relevant_chunks(query)
69
+ context = "\n".join(retrieved_chunks)
70
+ prompt = f"""
71
+ Please provide the answer in **{language}**.
72
+
73
+ You are a helpful AI assistant providing tourism information about Rajasthan.
74
+ Answer based on the following context. If information is unavailable, say "I don't know."
75
+
76
+ Context: {context}
77
+ Question: {query}
78
+
79
+ Answer:
80
+ """
81
+ response = conversation.run(prompt)
82
+ return response.strip()
83
+
84
+ def generate_speech(text, language):
85
+ lang_map = {"English": "en", "Hindi": "hi", "Spanish": "es", "French": "fr", "German": "de", "Tamil": "ta"}
86
+ lang_code = lang_map.get(language, "en")
87
+ tts = gTTS(text, lang=lang_code)
88
+ temp_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
89
+ tts.save(temp_audio_path)
90
+ return temp_audio_path
91
+
92
+ def chatbot_interface(query, language, chat_history):
93
+ response = generate_rag_response(query, language)
94
+ speech_path = generate_speech(response, language)
95
+
96
+ # Just append a πŸ”Š icon and use Gradio to handle the file
97
+ response_with_audio = f"{response} πŸ”Š (Click play below)"
98
+ chat_history.append((query, response_with_audio))
99
+
100
+ return chat_history, speech_path, "" # Return file path as separate gr.Audio
101
+
102
+ def handle_menu_click(topic, language, chat_history):
103
+ query = f"Give me information about {topic} in Rajasthan."
104
+ return chatbot_interface(query, language, chat_history)
105
+
106
+ # Define language and menu options
107
+ language_options = ['English', 'Hindi', 'Spanish', 'French', 'German', 'Tamil']
108
+ menu_options = ["Places to Visit", "Best Time to Visit", "Festivals", "Cuisine", "Travel Tips"]
109
+
110
+ # Create the Gradio interface
111
+ with gr.Blocks(css="""
112
+ body {background-color: #FFF2E1; font-family: Arial, sans-serif;}
113
+ .gradio-container {max-width: 800px; margin: auto; padding: 20px; background: #FFF2E1;
114
+ border-radius: 15px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);}
115
+ .gradio-title {color: #462f22; text-align: center; font-size: 24px; font-weight: bold;
116
+ padding-bottom: 10px;}
117
+ .gradio-chat {border: 1px solid #e1c7a6; border-radius: 10px; padding: 10px; background: #fff;
118
+ min-height: 250px; color:#462f22}
119
+ .gr-button {background-color:#FFFCF5; color: #ec8d12; font-size: 14px; border-radius: 8px;
120
+ padding: 8px; border: 2px solid #e6ac55; cursor: pointer;}
121
+ .gr-button:hover {background-color: #ec8d12;color:#fff}
122
+ .clear-chat {float: right; background: #fff3e0; border: 1px solid #ed5722; color: #ed5722;
123
+ font-weight: bold; border-radius: 6px; padding: 5px 10px; cursor: pointer;}
124
+ .chat-input {width: 100%; padding: 10px; border-radius: 8px; border: 1px solid #e1c7a6;}
125
+ """) as demo:
126
+
127
+ gr.Markdown("<h2 class='gradio-title'>πŸͺ Rajasthan Tourism Chatbot</h2>")
128
+
129
+ language_selector = gr.Dropdown(language_options, value="English", label="Select Language")
130
+ chatbot = gr.Chatbot(label="Rajasthan Tourism Assistant", elem_classes="gradio-chat")
131
+
132
+ with gr.Row():
133
+ for topic in menu_options:
134
+ btn = gr.Button(topic, elem_classes="gr-button")
135
+ btn.click(handle_menu_click,
136
+ inputs=[gr.Textbox(value=topic, visible=False), language_selector, chatbot],
137
+ outputs=[chatbot, gr.Audio(label="πŸ”Š Audio Response", type="filepath"), gr.Textbox()])
138
+
139
+ query_input = gr.Textbox(placeholder="Ask about Rajasthan...", label="Enter your query", elem_classes="chat-input")
140
+ audio_output = gr.Audio(label="πŸ”Š Audio Response", type="filepath", visible=True)
141
+
142
+ query_input.submit(
143
+ chatbot_interface,
144
+ inputs=[query_input, language_selector, chatbot],
145
+ outputs=[chatbot, audio_output, query_input]
146
+ )
147
+
148
+ # Launch the app
149
+ if __name__ == "__main__":
150
+ demo.launch()
readme.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Rajasthan Tourism Chatbot
2
+
3
+ This is a RAG-powered chatbot that provides information about tourism in Rajasthan, India. The application uses Groq LLM for text generation and supports multiple languages.
4
+
5
+ ## Features
6
+
7
+ - Question answering about Rajasthan tourism
8
+ - Multi-language support (English, Hindi, Spanish, French, German, Tamil)
9
+ - Text-to-speech output in the selected language
10
+ - Quick access buttons for common tourism queries
11
+ - RAG (Retrieval Augmented Generation) integration using FastEmbed and FAISS
12
+
13
+ ## Technical Details
14
+
15
+ - Built with LangChain and Groq API
16
+ - Uses FastEmbed for embedding generation
17
+ - Semantic chunking for better text segmentation
18
+ - FAISS vector database for efficient similarity search
19
+ - Gradio for the user interface
20
+
21
+ ## Required API Key
22
+
23
+ This application requires a Groq API key to function. The key should be added as a secret in Hugging Face Spaces.
24
+
25
+ ## Usage
26
+
27
+ 1. Select your preferred language from the dropdown
28
+ 2. Use the quick access buttons for common queries
29
+ 3. Or type your own question in the text field
30
+ 4. Listen to the audio response by clicking the play button
31
+
32
+ ## Data Sources
33
+
34
+ The chatbot is trained on tourism information about Rajasthan, stored in the TourismChatbot.pdf file.