Spaces:
Running
Running
import os | |
import streamlit as st | |
import pandas as pd | |
import openai | |
import torch | |
import matplotlib.pyplot as plt | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from dotenv import load_dotenv | |
import anthropic | |
import ast | |
import re | |
from langchain.agents import AgentType, initialize_agent | |
from langchain.tools import Tool | |
from langchain.chat_models import ChatOpenAI | |
from langchain.memory import ConversationBufferMemory | |
# Load environment variables | |
load_dotenv() | |
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") | |
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY") | |
# UI Styling | |
st.markdown( | |
""" | |
<style> | |
.stButton button { | |
background-color: #1F6FEB; | |
color: white; | |
border-radius: 8px; | |
border: none; | |
padding: 10px 20px; | |
font-weight: bold; | |
} | |
.stButton button:hover { | |
background-color: #1A4FC5; | |
} | |
.stTextInput > div > input { | |
border: 1px solid #30363D; | |
background-color: #161B22; | |
color: #C9D1D9; | |
border-radius: 6px; | |
padding: 10px; | |
} | |
.stFileUploader > div { | |
border: 2px dashed #30363D; | |
background-color: #161B22; | |
color: #C9D1D9; | |
border-radius: 6px; | |
padding: 10px; | |
} | |
.response-box { | |
background-color: #161B22; | |
padding: 10px; | |
border-radius: 6px; | |
margin-bottom: 10px; | |
color: #FFFFFF; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
st.title("Excel Q&A Chatbot π") | |
# Initialize LangChain Agent with Multi-step Reasoning and Memory | |
def safe_execute_query(query): | |
"""Safely executes Pandas operations without using eval.""" | |
try: | |
# Ensure the query is a valid Pandas expression | |
parsed_query = re.sub(r"[^a-zA-Z0-9_().,'\[\] ]", "", query.strip()) | |
if "df.query(" in parsed_query or "df[" in parsed_query: | |
return eval(parsed_query, {"df": df, "pd": pd}) # Safe execution of query-based operations | |
else: | |
return "Unsupported query type. Please refine your question." | |
except Exception as e: | |
return f"Error executing query: {str(e)}" | |
def execute_query(query): | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
tool = Tool( | |
name="Pandas Query Executor", | |
func=safe_execute_query, | |
description="Executes Pandas-based queries on uploaded data" | |
) | |
agent = initialize_agent( | |
tools=[tool], | |
llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0), | |
agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, | |
memory=memory, | |
verbose=True | |
) | |
return agent.run(query) | |
# Model Selection | |
model_choice = st.selectbox("Select LLM Model", ["OpenAI GPT-3.5", "Claude 3 Haiku", "Mistral-7B"]) | |
# File Upload with validation | |
uploaded_file = st.file_uploader("Upload a file", type=["csv", "xlsx", "xls", "json", "tsv"]) | |
if uploaded_file is not None: | |
file_extension = uploaded_file.name.split(".")[-1].lower() | |
try: | |
if file_extension == "csv": | |
df = pd.read_csv(uploaded_file) | |
elif file_extension in ["xlsx", "xls"]: | |
df = pd.read_excel(uploaded_file, engine="openpyxl") | |
elif file_extension == "json": | |
df = pd.read_json(uploaded_file) | |
elif file_extension == "tsv": | |
df = pd.read_csv(uploaded_file, sep="\t") | |
else: | |
st.error("Unsupported file format. Please upload a CSV, Excel, JSON, or TSV file.") | |
st.stop() | |
st.write("### Preview of Data:") | |
st.write(df.head()) | |
# Extract metadata | |
column_names = df.columns.tolist() | |
data_types = df.dtypes.apply(lambda x: x.name).to_dict() | |
missing_values = df.isnull().sum().to_dict() | |
# Display metadata | |
st.write("### Column Details:") | |
st.write(pd.DataFrame({"Column": column_names, "Type": data_types.values(), "Missing Values": missing_values.values()})) | |
except Exception as e: | |
st.error(f"Error loading file: {str(e)}") | |
st.stop() | |
# User Query | |
query = st.text_input("Ask a question about this data:") | |
if st.button("Submit Query"): | |
if query: | |
try: | |
exec_result = execute_query(query) | |
st.write("### Result:") | |
st.write(exec_result) | |
except Exception as e: | |
st.error(f"Error executing query: {str(e)}") | |
# Memory for context retention | |
if "query_history" not in st.session_state: | |
st.session_state.query_history = [] | |
st.session_state.query_history.append(query) | |