import json import os import uuid from datetime import datetime from typing import Dict import pandas as pd import streamlit as st from datasets import load_dataset from dotenv import load_dotenv from langgraph_agent import DataAnalystAgent # Load environment variables load_dotenv() # Set up page config st.set_page_config( page_title="🤖 LangGraph Data Analyst Agent", layout="wide", page_icon="🤖", initial_sidebar_state="expanded", ) # Custom CSS for styling st.markdown( """ """, unsafe_allow_html=True, ) # API configuration def get_api_configuration(): """Get API configuration from environment variables.""" api_key = os.environ.get("NEBIUS_API_KEY") or os.environ.get("OPENAI_API_KEY") if not api_key: st.markdown( """

🔑 API Key Configuration Required

For Local Development:

Create a .env file in your project directory
Add your API key: NEBIUS_API_KEY=your_api_key_here
Or use OpenAI: OPENAI_API_KEY=your_api_key_here
Restart the application

For Deployment:

Set environment variable NEBIUS_API_KEY or OPENAI_API_KEY
Restart your application

""", unsafe_allow_html=True, ) st.stop() return api_key # Initialize the agent @st.cache_resource def get_agent(api_key: str) -> DataAnalystAgent: """Initialize and cache the LangGraph agent.""" return DataAnalystAgent(api_key=api_key) # Load dataset @st.cache_data def load_bitext_dataset(): """Load and cache the Bitext dataset.""" try: dataset = load_dataset( "bitext/Bitext-customer-support-llm-chatbot-training-dataset" ) df = pd.DataFrame(dataset["train"]) return df except Exception as e: st.error(f"Error loading dataset: {e}") return None # Session management functions def initialize_session(): """Initialize session state variables.""" if "session_id" not in st.session_state: st.session_state.session_id = str(uuid.uuid4()) if "conversation_history" not in st.session_state: st.session_state.conversation_history = [] if "user_profile" not in st.session_state: st.session_state.user_profile = {} if "current_thread_id" not in st.session_state: st.session_state.current_thread_id = st.session_state.session_id def create_new_session(): """Create a new session with a new thread ID.""" st.session_state.session_id = str(uuid.uuid4()) st.session_state.current_thread_id = st.session_state.session_id st.session_state.conversation_history = [] st.session_state.user_profile = {} def format_conversation_message(role: str, content: str, timestamp: str = None): """Format a conversation message for display.""" if timestamp is None: timestamp = datetime.now().strftime("%H:%M:%S") if role == "human": return f"""

👤 You ({timestamp}):
{content}

""" else: return f"""

🤖 Agent ({timestamp}):
{content}

""" def display_user_profile(profile: Dict): """Display user profile information.""" if not profile: return with st.expander("🧠 What I Remember About You", expanded=False): col1, col2 = st.columns(2) with col1: st.markdown("**Your Interests:**") interests = profile.get("interests", []) if interests: for interest in interests: st.write(f"• {interest}") else: st.write("_No interests recorded yet_") st.markdown("**Expertise Level:**") expertise = profile.get("expertise_level", "beginner") st.write(f"• {expertise.title()}") with col2: st.markdown("**Your Preferences:**") preferences = profile.get("preferences", {}) if preferences: for key, value in preferences.items(): st.write(f"• {key}: {value}") else: st.write("_No preferences recorded yet_") st.markdown("**Recent Query Topics:**") query_history = profile.get("query_history", []) if query_history: for query in query_history[-3:]: # Show last 3 st.write(f"• {query[:50]}...") else: st.write("_No query history yet_") def main(): # Custom header st.markdown( """

🤖 LangGraph Data Analyst Agent

Intelligent Analysis with Memory & Recommendations

""", unsafe_allow_html=True, ) # Initialize session initialize_session() # Get API configuration api_key = get_api_configuration() # Initialize agent agent = get_agent(api_key) # Load dataset with st.spinner("🔄 Loading dataset..."): df = load_bitext_dataset() if df is None: st.markdown( """

❌ Dataset Loading Failed

Failed to load dataset. Please check your connection and try again.

""", unsafe_allow_html=True, ) return # Success message st.markdown( f"""

✅ System Ready

Dataset loaded with {len(df):,} records. LangGraph agent initialized with memory.

""", unsafe_allow_html=True, ) # Sidebar configuration with st.sidebar: st.markdown("## ⚙️ Session Management") # Session ID management st.markdown("### 🆔 Session Control") col1, col2 = st.columns(2) with col1: if st.button("🆕 New Session", use_container_width=True): create_new_session() st.rerun() with col2: if st.button("🔄 Refresh", use_container_width=True): st.rerun() # Display session info st.markdown( f"""

Current Session:
{st.session_state.current_thread_id[:8]}...
Messages: {len(st.session_state.conversation_history)}

""", unsafe_allow_html=True, ) # Custom session ID input st.markdown("### 🔗 Join Existing Session") custom_thread_id = st.text_input( "Enter Session ID:", placeholder="Enter full session ID to join...", help="Use this to resume a previous conversation", ) if st.button("🔗 Join Session") and custom_thread_id: st.session_state.current_thread_id = custom_thread_id # Load conversation history for this thread history = agent.get_conversation_history(custom_thread_id) st.session_state.conversation_history = history # Load user profile for this thread profile = agent.get_user_profile(custom_thread_id) st.session_state.user_profile = profile st.success(f"Joined session: {custom_thread_id[:8]}...") st.rerun() st.markdown("---") # Dataset info st.markdown("### 📊 Dataset Info") col1, col2 = st.columns(2) with col1: st.metric("📝 Records", f"{len(df):,}") with col2: st.metric("📂 Categories", len(df["category"].unique())) st.metric("🎯 Intents", len(df["intent"].unique())) # Quick examples st.markdown("### 💡 Try These Queries") example_queries = [ "What are the most common categories?", "Show me examples of billing issues", "Summarize the refund category", "What should I query next?", "What do you remember about me?", ] for query in example_queries: if st.button(f"💬 {query}", key=f"example_{hash(query)}"): st.session_state.pending_query = query st.rerun() # Main content area # Display user profile if st.session_state.user_profile: display_user_profile(st.session_state.user_profile) # Dataset information in expandable section with st.expander("📊 Dataset Information", expanded=False): st.markdown("### Dataset Details") metrics_col1, metrics_col2, metrics_col3, metrics_col4 = st.columns(4) with metrics_col1: st.metric("Total Records", f"{len(df):,}") with metrics_col2: st.metric("Columns", len(df.columns)) with metrics_col3: st.metric("Categories", len(df["category"].unique())) with metrics_col4: st.metric("Intents", len(df["intent"].unique())) st.markdown("### Sample Data") st.dataframe(df.head(), use_container_width=True) st.markdown("### Category Distribution") st.bar_chart(df["category"].value_counts()) # User input section st.markdown("## 💬 Chat with the Agent") # Handle pending query from sidebar has_pending_query = hasattr(st.session_state, "pending_query") if has_pending_query: user_question = st.session_state.pending_query delattr(st.session_state, "pending_query") else: user_question = st.text_input( "Ask your question:", placeholder="e.g., What are the most common customer issues?", key="user_input", help="Ask about statistics, examples, insights, or request recommendations", ) # Submit button col1, col2, col3 = st.columns([1, 2, 1]) with col2: submit_clicked = st.button("🚀 Send Message", use_container_width=True) # Process query if (submit_clicked or has_pending_query) and user_question: # Add user message to local history timestamp = datetime.now().strftime("%H:%M:%S") st.session_state.conversation_history.append( {"role": "human", "content": user_question, "timestamp": timestamp} ) # Show thinking indicator thinking_placeholder = st.empty() thinking_placeholder.markdown( """

⚙️ Agent is thinking... Processing your query through the LangGraph workflow.

""", unsafe_allow_html=True, ) try: # Invoke the agent result = agent.invoke(user_question, st.session_state.current_thread_id) # Get the last assistant message assistant_response = None for msg in reversed(result["messages"]): if ( hasattr(msg, "content") and msg.content and not isinstance(msg, type(user_question)) ): # Check if this is an AI message (not human or tool message) if not hasattr(msg, "tool_calls") or not msg.tool_calls: if "human" not in str(type(msg)).lower(): content = msg.content # Clean up Qwen model thinking tags if "" in content and "" in content: # Extract only the part after parts = content.split("") if len(parts) > 1: content = parts[1].strip() assistant_response = content break if not assistant_response: assistant_response = "I processed your query but couldn't generate a response. Please try again." # Add assistant response to local history st.session_state.conversation_history.append( { "role": "assistant", "content": assistant_response, "timestamp": datetime.now().strftime("%H:%M:%S"), } ) # Update user profile from agent state if result.get("user_profile"): st.session_state.user_profile = result["user_profile"] except Exception as e: error_msg = f"Sorry, I encountered an error: {str(e)}" st.session_state.conversation_history.append( { "role": "assistant", "content": error_msg, "timestamp": datetime.now().strftime("%H:%M:%S"), } ) finally: thinking_placeholder.empty() # Clear the input and rerun to show new messages st.rerun() # Display conversation if st.session_state.conversation_history: st.markdown("## 💭 Conversation") # Display messages for i, message in enumerate(st.session_state.conversation_history): message_html = format_conversation_message( message["role"], message["content"], message.get("timestamp", "") ) st.markdown(message_html, unsafe_allow_html=True) # Add separator except for last message if i < len(st.session_state.conversation_history) - 1: st.markdown("---") # Action buttons col1, col2, col3 = st.columns(3) with col1: if st.button("🗑️ Clear Chat"): st.session_state.conversation_history = [] st.rerun() with col2: if st.button("💾 Export Chat"): chat_data = { "session_id": st.session_state.current_thread_id, "timestamp": datetime.now().isoformat(), "conversation": st.session_state.conversation_history, "user_profile": st.session_state.user_profile, } st.download_button( label="📥 Download JSON", data=json.dumps(chat_data, indent=2), file_name=f"chat_export_{st.session_state.current_thread_id[:8]}.json", mime="application/json", ) with col3: if st.button("🤖 Get Recommendations"): st.session_state.pending_query = "What should I query next?" st.rerun() # Instructions with st.expander("📋 How to Use This Agent", expanded=False): st.markdown( """ ### 🎯 Query Types Supported: **Structured Queries (Quantitative):** - "How many records are in each category?" - "Show me 5 examples of billing issues" - "What are the most common intents?" **Unstructured Queries (Qualitative):** - "Summarize the refund category" - "What patterns do you see in payment issues?" - "Analyze customer sentiment in billing conversations" **Memory & Recommendations:** - "What do you remember about me?" - "What should I query next?" - "Advise me what to explore" ### 🧠 Memory Features: - **Session Persistence:** Your conversations are saved across page reloads - **User Profile:** The agent learns about your interests and preferences - **Query History:** Past queries influence future recommendations - **Cross-Session:** Use session IDs to resume conversations later ### 🔧 Advanced Features: - **Multi-Agent Architecture:** Separate agents for different query types - **Tool Usage:** Dynamic tool selection based on your needs - **Interactive Recommendations:** Collaborative query refinement """ ) if __name__ == "__main__": main()