MBAL_chatbot

Sleeping

App Files Files Community

MBAL_chatbot / app_abhi.py

ngcanh

Rename app.py to app_abhi.py

49055b0 verified 22 days ago

raw

history blame contribute delete

11.8 kB

	import os
	import streamlit as st
	import PyPDF2
	import subprocess
	import openai
	from openai import OpenAI
	from langchain_openai import ChatOpenAI
	from io import BytesIO
	from typing import List, Dict
	from dotenv import load_dotenv
	# Load environment variables
	OPENAI_API_KEY = os.getenv("OPENAI_API")
	TOKEN=os.getenv('HF_TOKEN')
	subprocess.run(["huggingface-cli", "login", "--token", TOKEN, "--add-to-git-credential"])
	st.sidebar.title("Welcome to MBAL Chatbot")
	class PDFChatbot:
	def __init__(self):
	# Initialize Azure OpenAI client
	# self.azure_client = AzureOpenAI(
	# api_key=os.getenv("AZURE_OPENAI_KEY"),
	# api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
	# azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
	# )
	self.azure_client = openai.OpenAI()
	# Model name for your deployment
	# self.model_name = os.getenv("AZURE_OPENAI_MODEL", "gpt-4")
	# self.model_name = ChatOpenAI(model="gpt-3.5-turbo-0125",openai_api_key = OPENAI_API_KEY)

	# Store conversation history
	self.conversation_history = []
	self.pdf_content = ""
	def extract_text_from_pdf(self, pdf_file):
	"""Extract text content from uploaded PDF file."""
	try:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text() + "\n"
	return text.strip()
	except Exception as e:
	st.error(f"Error reading PDF: {str(e)}")
	return None
	def chunk_text(self, text: str, chunk_size: int = 3000) -> List[str]:
	"""Split text into smaller chunks for better processing."""
	words = text.split()
	chunks = []
	current_chunk = []
	current_length = 0
	for word in words:
	if current_length + len(word) + 1 > chunk_size:
	if current_chunk:
	chunks.append(" ".join(current_chunk))
	current_chunk = [word]
	current_length = len(word)
	else:
	current_chunk.append(word)
	current_length += len(word) + 1
	if current_chunk:
	chunks.append(" ".join(current_chunk))
	return chunks
	def get_relevant_context(self, query: str, chunks: List[str], max_chunks: int = 3) -> str:
	"""Get the most relevant chunks for the query (simple keyword matching)."""
	# Simple keyword-based relevance scoring
	query_words = set(query.lower().split())
	chunk_scores = []
	for i, chunk in enumerate(chunks):
	chunk_words = set(chunk.lower().split())
	# Calculate simple overlap score
	overlap = len(query_words.intersection(chunk_words))
	chunk_scores.append((i, overlap, chunk))
	# Sort by relevance score and take top chunks
	chunk_scores.sort(key=lambda x: x[1], reverse=True)
	relevant_chunks = [chunk for _, _, chunk in chunk_scores[:max_chunks]]
	return "\n\n".join(relevant_chunks)
	def chat_with_pdf(self, user_question: str, pdf_content: str) -> str:
	"""Generate response using Azure OpenAI based on PDF content and user question."""
	try:
	# Split PDF content into chunks
	chunks = self.chunk_text(pdf_content)
	# Get relevant context for the question
	relevant_context = self.get_relevant_context(user_question, chunks)
	# Prepare messages for the chat
	# messages = [
	# {
	# "role": "system",
	# "content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
	# 1. Only provide information based on the PDF content provided
	# 2. If the answer is not in the PDF, clearly state that the information is not available in the document
	# 3. Provide clear, concise, and helpful responses in a professional manner
	# 4. Always respond in English using proper grammar and formatting
	# 5. When possible, reference specific sections or clauses from the policy
	# 6. Use insurance terminology appropriately but explain complex terms when necessary
	# 7. Be empathetic and patient, as insurance can be confusing for customers
	# 8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
	# 9. Always prioritize customer understanding and satisfaction
	# 10. If multiple interpretations are possible, explain the different scenarios clearly
	# Remember: You are here to help customers understand their insurance coverage better."""
	# },
	# {
	# "role": "user",
	# "content": f"""Insurance Document Content:
	# {relevant_context}
	# Customer Question: {user_question}
	# Please provide a helpful response based on the insurance document content above."""
	# }
	# ]
	messages = [
	{
	"role": "system",
	"content": """You are an experienced insurance agent assistant who helps customers understand their insurance policies and coverage details. Follow these guidelines:
	1. Only provide information based on the PDF content provided
	2. If the answer is not in the PDF, clearly state that the information is not available in the document
	3. Provide clear, concise, and helpful responses in a professional manner
	4. Always respond in Vietnamese using proper grammar and formatting
	5. When possible, reference specific sections or clauses from the policy
	6. Use insurance terminology appropriately but explain complex terms when necessary
	7. Be empathetic and patient, as insurance can be confusing for customers
	8. If asked about claims, coverage limits, deductibles, or policy terms, provide accurate information from the document
	9. Always prioritize customer understanding and satisfaction
	10. If multiple interpretations are possible, explain the different scenarios clearly
	Remember: You are here to help customers understand their insurance coverage better."""
	},
	{
	"role": "user",
	"content": f"""Insurance Document Content:
	{relevant_context}
	Customer Question: {user_question}
	Please provide a helpful response based on the insurance document content above."""
	}
	]
	# Add conversation history
	for msg in self.conversation_history[-6:]: # Keep last 6 messages for context
	messages.append(msg)
	# Get response from Azure OpenAI
	response = self.azure_client.chat.completions.create(
	model="gpt-3.5-turbo-0125",
	messages=messages,
	max_tokens=1000,
	temperature=0.7
	)
	bot_response = response.choices[0].message.content
	# Update conversation history
	self.conversation_history.append({"role": "user", "content": user_question})
	self.conversation_history.append({"role": "assistant", "content": bot_response})
	return bot_response
	except Exception as e:
	return f"Error generating response: {str(e)}"
	def main():
	# st.set_page_config(page_title="Insurance PDF Chatbot", page_icon="🛡️", layout="wide")
	st.title("🛡️ Insurance Policy Assistant")
	st.markdown("Upload your insurance policy PDF and ask questions about your coverage, claims, deductibles, and more!")
	# Initialize chatbot
	if 'chatbot' not in st.session_state:
	st.session_state.chatbot = PDFChatbot()
	st.session_state.pdf_processed = False
	st.session_state.chat_history = []
	# Sidebar for PDF upload and settings
	with st.sidebar:
	st.header("📁 Upload Insurance Document")
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
	if uploaded_file is not None:
	if st.button("Process PDF"):
	with st.spinner("Processing your insurance document..."):
	# Extract text from PDF
	text_content = st.session_state.chatbot.extract_text_from_pdf(uploaded_file)
	if text_content:
	st.session_state.chatbot.pdf_content = text_content
	st.session_state.pdf_processed = True
	st.success("Insurance document processed successfully!")
	# Show PDF summary
	st.subheader("Document Preview")
	st.text_area(
	"First 500 characters:",
	text_content[:500] + "..." if len(text_content) > 500 else text_content,
	height=100
	)
	else:
	st.error("Failed to process PDF")
	# Clear conversation
	if st.button("Clear Conversation"):
	st.session_state.chatbot.conversation_history = []
	st.session_state.chat_history = []
	st.rerun()
	# Main chat interface
	if st.session_state.pdf_processed:
	st.header("💬 Ask About Your Insurance Policy")
	# Display chat history
	for i, (question, answer) in enumerate(st.session_state.chat_history):
	with st.container():
	st.markdown(f"You: {question}")
	st.markdown(f"Insurance Assistant: {answer}")
	st.divider()
	# Chat input
	user_question = st.chat_input("Ask about your insurance coverage, claims, deductibles, or any policy details...")
	if user_question:
	with st.spinner("Analyzing your policy..."):
	# Get response from chatbot
	response = st.session_state.chatbot.chat_with_pdf(
	user_question,
	st.session_state.chatbot.pdf_content
	)
	# Add to chat history
	st.session_state.chat_history.append((user_question, response))
	# Display the new response
	st.markdown(f"You: {user_question}")
	st.markdown(f"Insurance Assistant: {response}")
	else:
	st.info("👆 Please upload and process an insurance PDF document to start chatting!")
	# Show example questions
	st.subheader("Example questions you can ask:")
	st.markdown("""
	- What is my coverage limit for property damage?
	- What is my deductible amount?
	- What types of incidents are covered under this policy?
	- What is excluded from my coverage?
	- How do I file a claim?
	- What is the process for claim settlement?
	- What are my premium payment options?
	- When does my policy expire?
	- Is flood damage covered?
	- What documentation do I need for a claim?
	""")
	# Add insurance tips
	st.subheader("💡 Insurance Tips")
	st.markdown("""
	- Review your policy regularly to understand your coverage
	- Keep your policy documents in a safe place
	- Update your coverage when your circumstances change
	- Document any incidents immediately
	- Contact your insurance agent if you have questions
	""")
	if __name__ == "__main__":
	main()