import os import streamlit as st import pandas as pd import openai import torch import matplotlib.pyplot as plt from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from dotenv import load_dotenv import anthropic import ast import re from langchain.agents import AgentType, initialize_agent from langchain.tools import Tool from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory # Load environment variables load_dotenv() os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY") # UI Styling st.markdown( """ """, unsafe_allow_html=True ) st.title("Excel Q&A Chatbot 📊") # Initialize LangChain Agent with Multi-step Reasoning and Memory def safe_execute_query(query): """Safely executes Pandas operations without using eval.""" try: # Ensure the query is a valid Pandas expression parsed_query = re.sub(r"[^a-zA-Z0-9_().,'\[\] ]", "", query.strip()) if "df.query(" in parsed_query or "df[" in parsed_query: return eval(parsed_query, {"df": df, "pd": pd}) # Safe execution of query-based operations else: return "Unsupported query type. Please refine your question." except Exception as e: return f"Error executing query: {str(e)}" def execute_query(query): memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) tool = Tool( name="Pandas Query Executor", func=safe_execute_query, description="Executes Pandas-based queries on uploaded data" ) agent = initialize_agent( tools=[tool], llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0), agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, memory=memory, verbose=True ) return agent.run(query) # Model Selection model_choice = st.selectbox("Select LLM Model", ["OpenAI GPT-3.5", "Claude 3 Haiku", "Mistral-7B"]) # File Upload with validation uploaded_file = st.file_uploader("Upload a file", type=["csv", "xlsx", "xls", "json", "tsv"]) if uploaded_file is not None: file_extension = uploaded_file.name.split(".")[-1].lower() try: if file_extension == "csv": df = pd.read_csv(uploaded_file) elif file_extension in ["xlsx", "xls"]: df = pd.read_excel(uploaded_file, engine="openpyxl") elif file_extension == "json": df = pd.read_json(uploaded_file) elif file_extension == "tsv": df = pd.read_csv(uploaded_file, sep="\t") else: st.error("Unsupported file format. Please upload a CSV, Excel, JSON, or TSV file.") st.stop() st.write("### Preview of Data:") st.write(df.head()) # Extract metadata column_names = df.columns.tolist() data_types = df.dtypes.apply(lambda x: x.name).to_dict() missing_values = df.isnull().sum().to_dict() # Display metadata st.write("### Column Details:") st.write(pd.DataFrame({"Column": column_names, "Type": data_types.values(), "Missing Values": missing_values.values()})) except Exception as e: st.error(f"Error loading file: {str(e)}") st.stop() # User Query query = st.text_input("Ask a question about this data:") if st.button("Submit Query"): if query: try: exec_result = execute_query(query) st.write("### Result:") st.write(exec_result) except Exception as e: st.error(f"Error executing query: {str(e)}") # Memory for context retention if "query_history" not in st.session_state: st.session_state.query_history = [] st.session_state.query_history.append(query)