Spaces:

abdull4h
/

vision-2030-virtual-assistant

Running

App Files Files Community

vision-2030-virtual-assistant / app.py

abdull4h

Update app.py

f4c0f01 verified 3 months ago

raw

history blame

27.2 kB

	# Vision 2030 Virtual Assistant with Arabic (ALLaM-7B) and English (Mistral-7B-Instruct) + RAG + Evaluation Framework
	"""
	Enhanced implementation of the Vision 2030 Virtual Assistant that meets all project requirements:
	1. Implements proper NLP task structure (bilingual QA system)
	2. Adds comprehensive evaluation framework for quantitative and qualitative assessment
	3. Improves RAG implementation with better retrieval and document processing
	4. Adds user feedback collection for continuous improvement
	5. Includes structured logging and performance monitoring
	"""

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from langdetect import detect
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	import json
	import time
	import logging
	import os
	import re
	from datetime import datetime
	from sklearn.metrics import precision_recall_fscore_support, accuracy_score
	import pandas as pd
	import matplotlib.pyplot as plt
	import PyPDF2
	import io

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler("vision2030_assistant.log"),
	logging.StreamHandler()
	]
	)
	logger = logging.getLogger('vision2030_assistant')

	class Vision2030Assistant:
	def __init__(self, pdf_path="vision2030.pdf", eval_data_path="evaluation_data.json"):
	"""
	Initialize the Vision 2030 Assistant with models, knowledge base, and evaluation framework

	Args:
	pdf_path: Path to the Vision 2030 PDF document
	eval_data_path: Path to evaluation dataset
	"""
	logger.info("Initializing Vision 2030 Assistant...")
	self.load_models()
	self.load_and_process_documents(pdf_path)
	self.setup_evaluation_framework(eval_data_path)
	self.response_history = []
	logger.info("Vision 2030 Assistant initialized successfully")

	def load_models(self):
	"""Load language models and embedding models for both Arabic and English"""
	logger.info("Loading language and embedding models...")

	# Load Arabic Model (ALLaM-7B)
	try:
	self.arabic_model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
	self.arabic_tokenizer = AutoTokenizer.from_pretrained(self.arabic_model_id)
	self.arabic_model = AutoModelForCausalLM.from_pretrained(self.arabic_model_id, device_map="auto")
	self.arabic_pipe = pipeline("text-generation", model=self.arabic_model, tokenizer=self.arabic_tokenizer)
	logger.info("Arabic model loaded successfully")
	except Exception as e:
	logger.error(f"Error loading Arabic model: {str(e)}")
	raise

	# Load English Model (Mistral-7B-Instruct)
	try:
	self.english_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
	self.english_tokenizer = AutoTokenizer.from_pretrained(self.english_model_id)
	self.english_model = AutoModelForCausalLM.from_pretrained(self.english_model_id, device_map="auto")
	self.english_pipe = pipeline("text-generation", model=self.english_model, tokenizer=self.english_tokenizer)
	logger.info("English model loaded successfully")
	except Exception as e:
	logger.error(f"Error loading English model: {str(e)}")
	raise

	# Load Embedding Models for Retrieval
	try:
	self.arabic_embedder = SentenceTransformer('CAMeL-Lab/bert-base-arabic-camelbert-ca')
	self.english_embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	logger.info("Embedding models loaded successfully")
	except Exception as e:
	logger.error(f"Error loading embedding models: {str(e)}")
	raise

	def load_and_process_documents(self, pdf_path):
	"""Load and process the Vision 2030 document from PDF"""
	logger.info(f"Processing Vision 2030 document from {pdf_path}")

	# Initialize empty document lists
	self.english_texts = []
	self.arabic_texts = []

	try:
	# Check if PDF exists
	if os.path.exists(pdf_path):
	# Extract text from PDF
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	full_text = ""
	for page_num in range(len(reader.pages)):
	page = reader.pages[page_num]
	full_text += page.extract_text() + "\n"

	# Split into chunks (simple approach - could be improved with better text segmentation)
	chunks = [chunk.strip() for chunk in re.split(r'\n\s*\n', full_text) if chunk.strip()]

	# Detect language and add to appropriate list
	for chunk in chunks:
	try:
	lang = detect(chunk)
	if lang == "ar":
	self.arabic_texts.append(chunk)
	else: # Default to English for other languages
	self.english_texts.append(chunk)
	except:
	# If language detection fails, assume English
	self.english_texts.append(chunk)

	logger.info(f"Processed {len(self.arabic_texts)} Arabic and {len(self.english_texts)} English chunks")
	else:
	logger.warning(f"PDF file not found at {pdf_path}. Using fallback sample data.")
	self._create_sample_data()
	except Exception as e:
	logger.error(f"Error processing PDF: {str(e)}")
	logger.info("Using fallback sample data")
	self._create_sample_data()

	# Create FAISS indices
	self._create_indices()

	def _create_sample_data(self):
	"""Create sample Vision 2030 data if PDF processing fails"""
	logger.info("Creating sample Vision 2030 data")

	# English sample texts
	self.english_texts = [
	"Vision 2030 is Saudi Arabia's strategic framework to reduce dependence on oil, diversify the economy, and develop public sectors.",
	"The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation.",
	"The Saudi Public Investment Fund (PIF) plays a crucial role in Vision 2030 by investing in strategic sectors.",
	"NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030.",
	"Vision 2030 aims to increase women's participation in the workforce from 22% to 30%.",
	"The Red Sea Project is a Vision 2030 initiative to develop luxury tourism destinations across 50 islands off Saudi Arabia's Red Sea coast.",
	"Qiddiya is a entertainment mega-project being built in Riyadh as part of Vision 2030.",
	"Vision 2030 targets increasing the private sector's contribution to GDP from 40% to 65%.",
	"One goal of Vision 2030 is to increase foreign direct investment from 3.8% to 5.7% of GDP.",
	"Vision 2030 includes plans to develop the digital infrastructure and support for tech startups in Saudi Arabia."
	]

	# Arabic sample texts (same content as English)
	self.arabic_texts = [
	"رؤية 2030 هي الإطار الاستراتيجي للمملكة العربية السعودية للحد من الاعتماد على النفط وتنويع الاقتصاد وتطوير القطاعات العامة.",
	"الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح.",
	"يلعب صندوق الاستثمارات العامة السعودي دورًا محوريًا في رؤية 2030 من خلال الاستثمار في القطاعات الاستراتيجية.",
	"نيوم هي مدينة ذكية مخططة عبر الحدود في مقاطعة تبوك شمال غرب المملكة العربية السعودية، وهي مشروع رئيسي من رؤية 2030.",
	"تهدف رؤية 2030 إلى زيادة مشاركة المرأة في القوى العاملة من 22٪ إلى 30٪.",
	"مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي.",
	"القدية هي مشروع ترفيهي ضخم يتم بناؤه في الرياض كجزء من رؤية 2030.",
	"تستهدف رؤية 2030 زيادة مساهمة القطاع الخاص في الناتج المحلي الإجمالي من 40٪ إلى 65٪.",
	"أحد أهداف رؤية 2030 هو زيادة الاستثمار الأجنبي المباشر من 3.8٪ إلى 5.7٪ من الناتج المحلي الإجمالي.",
	"تتضمن رؤية 2030 خططًا لتطوير البنية التحتية الرقمية والدعم للشركات الناشئة التكنولوجية في المملكة العربية السعودية."
	]

	def _create_indices(self):
	"""Create FAISS indices for fast text retrieval"""
	logger.info("Creating FAISS indices for text retrieval")

	try:
	# Process and embed English texts
	self.english_vectors = []
	for text in self.english_texts:
	vec = self.english_embedder.encode(text)
	self.english_vectors.append(vec)

	# Create English index
	if self.english_vectors:
	self.english_index = faiss.IndexFlatL2(len(self.english_vectors[0]))
	self.english_index.add(np.array(self.english_vectors))
	logger.info(f"Created English index with {len(self.english_vectors)} vectors")
	else:
	logger.warning("No English texts to index")

	# Process and embed Arabic texts
	self.arabic_vectors = []
	for text in self.arabic_texts:
	vec = self.arabic_embedder.encode(text)
	self.arabic_vectors.append(vec)

	# Create Arabic index
	if self.arabic_vectors:
	self.arabic_index = faiss.IndexFlatL2(len(self.arabic_vectors[0]))
	self.arabic_index.add(np.array(self.arabic_vectors))
	logger.info(f"Created Arabic index with {len(self.arabic_vectors)} vectors")
	else:
	logger.warning("No Arabic texts to index")

	except Exception as e:
	logger.error(f"Error creating FAISS indices: {str(e)}")
	raise

	def setup_evaluation_framework(self, eval_data_path):
	"""Set up the evaluation framework with test data and metrics"""
	logger.info("Setting up evaluation framework")

	# Initialize metrics trackers
	self.metrics = {
	"response_times": [],
	"user_ratings": [],
	"retrieval_precision": [],
	"factual_accuracy": []
	}

	# Load evaluation data if exists, otherwise create sample
	try:
	if os.path.exists(eval_data_path):
	with open(eval_data_path, 'r', encoding='utf-8') as f:
	self.eval_data = json.load(f)
	logger.info(f"Loaded {len(self.eval_data)} evaluation examples from {eval_data_path}")
	else:
	logger.warning(f"Evaluation data not found at {eval_data_path}. Creating sample evaluation data.")
	self._create_sample_eval_data()
	except Exception as e:
	logger.error(f"Error loading evaluation data: {str(e)}")
	self._create_sample_eval_data()

	def _create_sample_eval_data(self):
	"""Create sample evaluation data with ground truth"""
	self.eval_data = [
	{
	"question": "What are the key pillars of Vision 2030?",
	"lang": "en",
	"reference_answer": "The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation."
	},
	{
	"question": "ما هي الركائز الرئيسية لرؤية 2030؟",
	"lang": "ar",
	"reference_answer": "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح."
	},
	{
	"question": "What is NEOM?",
	"lang": "en",
	"reference_answer": "NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030."
	},
	{
	"question": "ما هو مشروع البحر الأحمر؟",
	"lang": "ar",
	"reference_answer": "مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي."
	}
	]
	logger.info(f"Created {len(self.eval_data)} sample evaluation examples")

	def retrieve_context(self, query, lang):
	"""Retrieve relevant context for a query based on language"""
	start_time = time.time()

	try:
	if lang == "ar":
	query_vec = self.arabic_embedder.encode(query)
	D, I = self.arabic_index.search(np.array([query_vec]), k=2) # Get top 2 most relevant chunks
	context = "\n".join([self.arabic_texts[i] for i in I[0] if i < len(self.arabic_texts) and i >= 0])
	else:
	query_vec = self.english_embedder.encode(query)
	D, I = self.english_index.search(np.array([query_vec]), k=2) # Get top 2 most relevant chunks
	context = "\n".join([self.english_texts[i] for i in I[0] if i < len(self.english_texts) and i >= 0])

	retrieval_time = time.time() - start_time
	logger.info(f"Retrieved context in {retrieval_time:.2f}s")

	return context
	except Exception as e:
	logger.error(f"Error retrieving context: {str(e)}")
	return ""

	def generate_response(self, user_input):
	"""Generate a response to user input using the appropriate model and retrieval system"""
	start_time = time.time()

	# Default response in case of failure
	default_response = {
	"en": "I apologize, but I couldn't process your request properly. Please try again.",
	"ar": "أعتذر، لم أتمكن من معالجة طلبك بشكل صحيح. الرجاء المحاولة مرة أخرى."
	}

	try:
	# Detect language
	try:
	lang = detect(user_input)
	if lang != "ar": # Simplify to just Arabic vs non-Arabic
	lang = "en"
	except:
	lang = "en" # Default fallback

	logger.info(f"Detected language: {lang}")

	# Retrieve relevant context
	context = self.retrieve_context(user_input, lang)

	if lang == "ar":
	# Improved Arabic Prompt
	input_text = (
	f"أنت خبير في رؤية السعودية 2030.\n"
	f"إليك بعض المعلومات المهمة:\n{context}\n\n"
	f"مثال:\n"
	f"السؤال: ما هي ركائز رؤية 2030؟\n"
	f"الإجابة: ركائز رؤية 2030 هي مجتمع حيوي، اقتصاد مزدهر، ووطن طموح.\n\n"
	f"أجب عن سؤال المستخدم بشكل واضح ودقيق، مستندًا إلى المعلومات المقدمة. إذا لم تكن المعلومات متوفرة، أوضح ذلك.\n"
	f"السؤال: {user_input}\n"
	f"الإجابة:"
	)

	response = self.arabic_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
	full_text = response[0]['generated_text']

	# Extract the answer part
	answer_pattern = r"الإجابة:(.*?)(?:$)"
	match = re.search(answer_pattern, full_text, re.DOTALL)
	if match:
	reply = match.group(1).strip()
	else:
	reply = full_text
	else:
	# Improved English Prompt
	input_text = (
	f"You are an expert on Saudi Arabia's Vision 2030.\n"
	f"Here is some relevant information:\n{context}\n\n"
	f"Example:\n"
	f"Question: What are the key pillars of Vision 2030?\n"
	f"Answer: The key pillars are a vibrant society, a thriving economy, and an ambitious nation.\n\n"
	f"Answer the user's question clearly and accurately based on the provided information. If information is not available, make that clear.\n"
	f"Question: {user_input}\n"
	f"Answer:"
	)

	response = self.english_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
	full_text = response[0]['generated_text']

	# Extract the answer part
	answer_pattern = r"Answer:(.*?)(?:$)"
	match = re.search(answer_pattern, full_text, re.DOTALL)
	if match:
	reply = match.group(1).strip()
	else:
	reply = full_text

	except Exception as e:
	logger.error(f"Error generating response: {str(e)}")
	reply = default_response.get(lang, default_response["en"])

	# Record response time
	response_time = time.time() - start_time
	self.metrics["response_times"].append(response_time)

	logger.info(f"Generated response in {response_time:.2f}s")

	# Store the interaction for later evaluation
	interaction = {
	"timestamp": datetime.now().isoformat(),
	"user_input": user_input,
	"response": reply,
	"language": lang,
	"response_time": response_time
	}
	self.response_history.append(interaction)

	return reply

	def evaluate_factual_accuracy(self, response, reference):
	"""Simple evaluation of factual accuracy by keyword matching"""
	# This is a simplified approach - in production, use more sophisticated methods
	keywords_reference = set(re.findall(r'\b\w+\b', reference.lower()))
	keywords_response = set(re.findall(r'\b\w+\b', response.lower()))

	common_keywords = keywords_reference.intersection(keywords_response)

	if len(keywords_reference) > 0:
	accuracy = len(common_keywords) / len(keywords_reference)
	else:
	accuracy = 0

	return accuracy

	def evaluate_on_test_set(self):
	"""Evaluate the assistant on the test set"""
	logger.info("Running evaluation on test set")

	eval_results = []

	for example in self.eval_data:
	# Generate response
	response = self.generate_response(example["question"])

	# Calculate factual accuracy
	accuracy = self.evaluate_factual_accuracy(response, example["reference_answer"])

	eval_results.append({
	"question": example["question"],
	"reference": example["reference_answer"],
	"response": response,
	"factual_accuracy": accuracy
	})

	self.metrics["factual_accuracy"].append(accuracy)

	# Calculate average factual accuracy
	avg_accuracy = sum(self.metrics["factual_accuracy"]) / len(self.metrics["factual_accuracy"]) if self.metrics["factual_accuracy"] else 0
	avg_response_time = sum(self.metrics["response_times"]) / len(self.metrics["response_times"]) if self.metrics["response_times"] else 0

	results = {
	"average_factual_accuracy": avg_accuracy,
	"average_response_time": avg_response_time,
	"detailed_results": eval_results
	}

	logger.info(f"Evaluation results: Factual accuracy = {avg_accuracy:.2f}, Avg response time = {avg_response_time:.2f}s")

	return results

	def record_user_feedback(self, user_input, response, rating, feedback_text=""):
	"""Record user feedback for a response"""
	feedback = {
	"timestamp": datetime.now().isoformat(),
	"user_input": user_input,
	"response": response,
	"rating": rating,
	"feedback_text": feedback_text
	}

	self.metrics["user_ratings"].append(rating)

	# In a production system, store this in a database
	logger.info(f"Recorded user feedback: rating={rating}")

	return True

	def save_evaluation_metrics(self, output_path="evaluation_metrics.json"):
	"""Save evaluation metrics to a file"""
	try:
	with open(output_path, 'w', encoding='utf-8') as f:
	json.dump({
	"response_times": self.metrics["response_times"],
	"user_ratings": self.metrics["user_ratings"],
	"factual_accuracy": self.metrics["factual_accuracy"],
	"average_factual_accuracy": sum(self.metrics["factual_accuracy"]) / len(self.metrics["factual_accuracy"]) if self.metrics["factual_accuracy"] else 0,
	"average_response_time": sum(self.metrics["response_times"]) / len(self.metrics["response_times"]) if self.metrics["response_times"] else 0,
	"average_user_rating": sum(self.metrics["user_ratings"]) / len(self.metrics["user_ratings"]) if self.metrics["user_ratings"] else 0,
	"timestamp": datetime.now().isoformat()
	}, f, indent=2)

	logger.info(f"Saved evaluation metrics to {output_path}")
	return True
	except Exception as e:
	logger.error(f"Error saving evaluation metrics: {str(e)}")
	return False

	# --- Gradio UI --- #
	def create_gradio_interface():
	# Initialize the assistant
	assistant = Vision2030Assistant()

	# Track conversation history
	conversation_history = []

	def chat(message, history):
	if not message:
	return history, ""

	# Generate response
	reply = assistant.generate_response(message)

	# Update history
	history.append((message, reply))

	return history, ""

	def provide_feedback(message, rating, feedback_text):
	# Find the most recent interaction
	if conversation_history:
	last_interaction = conversation_history[-1]
	assistant.record_user_feedback(last_interaction[0], last_interaction[1], rating, feedback_text)
	return f"Thank you for your feedback! (Rating: {rating}/5)"
	return "No conversation found to rate."

	def clear_history():
	conversation_history.clear()
	return []

	def download_metrics():
	assistant.save_evaluation_metrics()
	return "evaluation_metrics.json"

	def run_evaluation():
	results = assistant.evaluate_on_test_set()
	return f"Evaluation Results:\nFactual Accuracy: {results['average_factual_accuracy']:.2f}\nAverage Response Time: {results['average_response_time']:.2f}s"

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Vision 2030 Virtual Assistant 🌍\n\nAsk questions about Saudi Vision 2030 in Arabic or English")

	with gr.Tab("Chat"):
	chatbot = gr.Chatbot(show_label=False)
	msg = gr.Textbox(label="Ask me anything about Vision 2030", placeholder="Type your question here...")
	clear = gr.Button("Clear Conversation")

	with gr.Row():
	with gr.Column(scale=4):
	feedback_text = gr.Textbox(label="Provide additional feedback (optional)")
	with gr.Column(scale=1):
	rating = gr.Slider(label="Rate Response (1-5)", minimum=1, maximum=5, step=1, value=3)

	submit_feedback = gr.Button("Submit Feedback")
	feedback_result = gr.Textbox(label="Feedback Status")

	# Set up event handlers
	msg.submit(chat, [msg, chatbot], [chatbot, msg])
	clear.click(clear_history, None, chatbot)
	submit_feedback.click(provide_feedback, [msg, rating, feedback_text], feedback_result)

	with gr.Tab("Evaluation"):
	eval_button = gr.Button("Run Evaluation on Test Set")
	eval_results = gr.Textbox(label="Evaluation Results")
	download_button = gr.Button("Download Metrics")
	download_file = gr.File(label="Download evaluation metrics as JSON")

	# Set up evaluation handlers
	eval_button.click(run_evaluation, None, eval_results)
	download_button.click(download_metrics, None, download_file)

	with gr.Tab("About"):
	gr.Markdown("""
	## About Vision 2030 Virtual Assistant

	This assistant uses a combination of state-of-the-art language models to answer questions about Saudi Arabia's Vision 2030 strategic framework in both Arabic and English.

	### Features:
	- Bilingual support (Arabic and English)
	- Retrieval-Augmented Generation (RAG) for factual accuracy
	- Evaluation framework for measuring performance
	- User feedback collection for continuous improvement

	### Models Used:
	- Arabic: ALLaM-7B-Instruct-preview
	- English: Mistral-7B-Instruct-v0.2
	- Embeddings: CAMeL-Lab/bert-base-arabic-camelbert-ca and sentence-transformers/all-MiniLM-L6-v2

	This project demonstrates the application of advanced NLP techniques for multilingual question answering, particularly for Arabic language support.
	""")

	return demo

	# Launch the application
	if __name__ == "__main__":
	demo = create_gradio_interface()
	demo.launch()