esra2001 commited on
Commit
2c63753
·
verified ·
1 Parent(s): 18de99f

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +226 -0
  2. requirement.txt +16 -0
main.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import gradio as gr
4
+ from dotenv import load_dotenv
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
7
+ from langchain_core.prompts import PromptTemplate
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_community.embeddings import HuggingFaceEmbeddings
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain.chains import RetrievalQA
12
+ from langchain_community.document_loaders import UnstructuredURLLoader
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain_community.vectorstores.utils import filter_complex_metadata
15
+ import smtplib
16
+ from email.mime.text import MIMEText
17
+ from email.mime.multipart import MIMEMultipart
18
+ import logging
19
+
20
+ load_dotenv()
21
+
22
+ os.environ['LANGCHAIN_TRACING_V2'] = 'true'
23
+ os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
24
+ os.environ['LANGCHAIN_API_KEY']
25
+ os.environ["OPENAI_API_KEY"]
26
+
27
+ embeddings_model = HuggingFaceEmbeddings(model_name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5")
28
+
29
+ model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
30
+
31
+
32
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
33
+
34
+ def detect_intent(text):
35
+ result = classifier(text, candidate_labels=["question", "greeting", "small talk", "feedback", "thanks"])
36
+ label = result["labels"][0]
37
+ return label.lower()
38
+
39
+ chroma_db_path = "./chroma_db"
40
+ chroma_client = chromadb.PersistentClient(path=chroma_db_path)
41
+
42
+ data = chroma_client.get_collection(name="my_dataaaa")
43
+ vectorstore = Chroma(
44
+ collection_name="my_dataaaa",
45
+ persist_directory="./chroma_db",
46
+ embedding_function=embeddings_model
47
+ )
48
+
49
+ #Create a retriever from chroma DATASTORE
50
+ retriever = vectorstore.as_retriever(
51
+ search_type="mmr",
52
+ search_kwargs={'k': 6, 'lambda_mult': 0.25}
53
+ )
54
+
55
+ reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
56
+
57
+ def rerank_docs(query, docs, top_k=50):
58
+ pairs = [(query, doc.page_content) for doc in docs]
59
+ scores = reranker.predict(pairs)
60
+ scored_docs = list(zip(docs, scores))
61
+ scored_docs = sorted(scored_docs, key=lambda x: x[1], reverse=True)
62
+ top_docs = [doc for doc, score in scored_docs[:top_k]]
63
+ return top_docs
64
+ custom_prompt = PromptTemplate.from_template("""
65
+ You are a helpful assistant answering student questions based ONLY on the provided context.
66
+ You must read the entire context carefully and include all relevant information in your answer.
67
+ If multiple documents or requirements are mentioned, list them all clearly and completely.
68
+ If the answer is not found in the context, respond with: "je ne trouve pas la réponse."
69
+ Do not use your own knowledge for university-related questions. Only use what is in the context.
70
+ Structure the answer clearly and completely. Do not make any assumptions if the context does not have the answer.
71
+
72
+ Context:
73
+ {context}
74
+
75
+ Question:
76
+ {question}
77
+
78
+ Answer:
79
+ """)
80
+
81
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
82
+
83
+ def format_docs(docs):
84
+ return "\n\n".join(doc.page_content for doc in docs)
85
+
86
+ context = format_docs(docs)
87
+ context
88
+
89
+ rag_chain = (
90
+ {
91
+ "context": retriever
92
+ | (lambda docs: rerank_docs(docs=docs, query="{question}"))
93
+ | format_docs,
94
+ "question": RunnablePassthrough()
95
+ }
96
+ | custom_prompt
97
+ | llm
98
+ | StrOutputParser()
99
+ )
100
+
101
+
102
+ PENDING_QUESTIONS_FILE = "pending_questions.json"
103
+
104
+ def store_pending_question(user_email, question):
105
+ q_id = str(uuid.uuid4())
106
+ pending = {
107
+ "id": q_id,
108
+ "timestamp": datetime.utcnow().isoformat(),
109
+ "user_email": user_email,
110
+ "question": question
111
+ }
112
+ if os.path.exists(PENDING_QUESTIONS_FILE):
113
+ with open(PENDING_QUESTIONS_FILE, "r") as f:
114
+ data = json.load(f)
115
+ else:
116
+ data = []
117
+
118
+ data.append(pending)
119
+ with open(PENDING_QUESTIONS_FILE, "w") as f:
120
+ json.dump(data, f, indent=4)
121
+ return q_id
122
+
123
+
124
+
125
+ def send_question_to_admin(user_email, user_question,question_id):
126
+ admin_email = "[email protected]"
127
+ smtp_server = "smtp.gmail.com"
128
+ smtp_port = 587
129
+ sender_email = "[email protected]"
130
+ sender_password = os.getenv("BOT_EMAIL_PASSWORD")
131
+
132
+ subject = f"Nouvelle question [{question_id}] "
133
+ body = (
134
+ f"Question ID: {question_id}\n"
135
+ f"Question posée :\n\n{user_question}"
136
+ )
137
+
138
+ message = MIMEMultipart()
139
+ message["From"] = sender_email
140
+ message["To"] = admin_email
141
+ message["Reply-To"] = "[email protected]"
142
+ message["Subject"] = subject
143
+
144
+ message.attach(MIMEText(body, "plain"))
145
+
146
+ try:
147
+ with smtplib.SMTP(smtp_server, smtp_port) as server:
148
+ server.starttls()
149
+ server.login(sender_email, sender_password)
150
+ server.sendmail(sender_email, admin_email, message.as_string())
151
+ return True
152
+ except Exception as e:
153
+ print("Error sending email:", e)
154
+ return False
155
+
156
+
157
+ def university_related(question):
158
+ labels = ["university", "general knowledge"]
159
+ result = classifier(question, candidate_labels=labels)
160
+ top_label = result["labels"][0]
161
+ return top_label.lower() == "university"
162
+
163
+ def uncertain(answer):
164
+ uncertain_phrases = [
165
+ "je ne trouve pas la réponse",
166
+ "désolé, je ne peux pas vous aider"
167
+ ]
168
+ return any(phrase in answer.lower() for phrase in uncertain_phrases) or answer.strip() == ""
169
+
170
+ def handle_user_query(question, user_email=None):
171
+ # using the classifier model
172
+ intent = detect_intent(question.lower())
173
+
174
+ if intent in ["greeting", "small talk"]:
175
+ return "Salut 👋 ! Posez-moi une question précise sur les procédures universitaires 😊."
176
+ if not university_related(question):
177
+ return "Merci de poser une question sur les procédures universitaires 😊"
178
+ # integration de RAG Pipeline
179
+ answer = rag_chain.invoke(question)
180
+
181
+ # making the llama know what to do if there are no relevant docs
182
+ if uncertain(answer):
183
+ if not user_email:
184
+ return (
185
+ "Je ne trouve pas la réponse à cette question. "
186
+ "Veuillez me fournir votre adresse e-mail et la question en français pour que je puisse la transmettre à un administrateur.")
187
+
188
+ q_id = store_pending_question(user_email, question)
189
+ sent = send_question_to_admin(user_email, question, q_id)
190
+
191
+ if sent:
192
+ return "Votre question a été transmise à l'administration. Vous recevrez une réponse par e-mail dès que possible."
193
+ else:
194
+ return "Une erreur est survenue lors de l'envoi de votre question. Veuillez réessayer plus tard."
195
+ else:
196
+ return answer
197
+
198
+
199
+ user_email = ""
200
+
201
+ def chatbot_fn(message, history):
202
+ global user_email
203
+ if not user_email:
204
+ if "@gmail.com" in message or "@fsm.rnu.tn" in message:
205
+ user_email = message
206
+ return "Merci ! Maintenant, posez-moi votre question 😊"
207
+ else:
208
+ return "Bienvenue 👋 Veuillez entrer votre adresse e-mail pour commencer."
209
+
210
+ return handle_user_query(message, user_email)
211
+
212
+ with gr.Blocks() as chat:
213
+ gr.ChatInterface(
214
+ fn=chatbot_fn,
215
+ title="Chatbot Universitaire 🤖 🧠",
216
+ description="Commencez par entrer votre adresse e-mail. Ensuite, posez toutes vos questions sur les procédures universitaires !",
217
+ examples=[
218
+ ["Comment faire une demande de réinscription ?"],
219
+ ["Quels sont les délais pour la soutenance ?"]
220
+ ],
221
+ submit_btn="Envoyer"
222
+ )
223
+ gr.Markdown("© 2025 Esra Belhassen. All rights reserved")
224
+
225
+ chat.launch(share=True)
226
+
requirement.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-openai
3
+ langchain-huggingface
4
+ langchain-community
5
+ langchain-core
6
+ sentence-transformers
7
+ transformers
8
+ chromadb
9
+ bs4
10
+ matplotlib
11
+ seaborn
12
+ scikit-learn
13
+ python-dotenv
14
+ gradio
15
+ ollama
16
+ uuid