|  | import streamlit as st | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import sqlite3 | 
					
						
						|  | import os | 
					
						
						|  | import json | 
					
						
						|  | from pathlib import Path | 
					
						
						|  | import plotly.express as px | 
					
						
						|  | from datetime import datetime, timezone | 
					
						
						|  | from crewai import Agent, Crew, Process, Task | 
					
						
						|  | from crewai.tools import tool | 
					
						
						|  | from langchain_groq import ChatGroq | 
					
						
						|  | from langchain_openai import ChatOpenAI | 
					
						
						|  | from langchain.schema.output import LLMResult | 
					
						
						|  | from langchain_community.tools.sql_database.tool import ( | 
					
						
						|  | InfoSQLDatabaseTool, | 
					
						
						|  | ListSQLDatabaseTool, | 
					
						
						|  | QuerySQLCheckerTool, | 
					
						
						|  | QuerySQLDataBaseTool, | 
					
						
						|  | ) | 
					
						
						|  | from langchain_community.utilities.sql_database import SQLDatabase | 
					
						
						|  | from datasets import load_dataset | 
					
						
						|  | import tempfile | 
					
						
						|  |  | 
					
						
						|  | st.title("SQL-RAG Using CrewAI π") | 
					
						
						|  | st.write("Analyze datasets using natural language queries powered by SQL and CrewAI.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | llm = None | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | groq_api_key = os.getenv("GROQ_API_KEY") | 
					
						
						|  | openai_api_key = os.getenv("OPENAI_API_KEY") | 
					
						
						|  |  | 
					
						
						|  | if model_choice == "llama-3.3-70b": | 
					
						
						|  | if not groq_api_key: | 
					
						
						|  | st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.") | 
					
						
						|  | llm = None | 
					
						
						|  | else: | 
					
						
						|  | llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile") | 
					
						
						|  | elif model_choice == "GPT-4o": | 
					
						
						|  | if not openai_api_key: | 
					
						
						|  | st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.") | 
					
						
						|  | llm = None | 
					
						
						|  | else: | 
					
						
						|  | llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if "df" not in st.session_state: | 
					
						
						|  | st.session_state.df = None | 
					
						
						|  | if "show_preview" not in st.session_state: | 
					
						
						|  | st.session_state.show_preview = False | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"]) | 
					
						
						|  |  | 
					
						
						|  | if input_option == "Use Hugging Face Dataset": | 
					
						
						|  | dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="Einstellung/demo-salaries") | 
					
						
						|  | if st.button("Load Dataset"): | 
					
						
						|  | try: | 
					
						
						|  | with st.spinner("Loading dataset..."): | 
					
						
						|  | dataset = load_dataset(dataset_name, split="train") | 
					
						
						|  | st.session_state.df = pd.DataFrame(dataset) | 
					
						
						|  | st.session_state.show_preview = True | 
					
						
						|  | st.success(f"Dataset '{dataset_name}' loaded successfully!") | 
					
						
						|  | except Exception as e: | 
					
						
						|  | st.error(f"Error: {e}") | 
					
						
						|  |  | 
					
						
						|  | elif input_option == "Upload CSV File": | 
					
						
						|  | uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"]) | 
					
						
						|  | if uploaded_file: | 
					
						
						|  | try: | 
					
						
						|  | st.session_state.df = pd.read_csv(uploaded_file) | 
					
						
						|  | st.session_state.show_preview = True | 
					
						
						|  | st.success("File uploaded successfully!") | 
					
						
						|  | except Exception as e: | 
					
						
						|  | st.error(f"Error loading file: {e}") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if st.session_state.df is not None and st.session_state.show_preview: | 
					
						
						|  | st.subheader("π Dataset Preview") | 
					
						
						|  | st.dataframe(st.session_state.df.head()) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if st.session_state.df is not None: | 
					
						
						|  | temp_dir = tempfile.TemporaryDirectory() | 
					
						
						|  | db_path = os.path.join(temp_dir.name, "data.db") | 
					
						
						|  | connection = sqlite3.connect(db_path) | 
					
						
						|  | st.session_state.df.to_sql("salaries", connection, if_exists="replace", index=False) | 
					
						
						|  | db = SQLDatabase.from_uri(f"sqlite:///{db_path}") | 
					
						
						|  |  | 
					
						
						|  | @tool("list_tables") | 
					
						
						|  | def list_tables() -> str: | 
					
						
						|  | """List all tables in the database.""" | 
					
						
						|  | return ListSQLDatabaseTool(db=db).invoke("") | 
					
						
						|  |  | 
					
						
						|  | @tool("tables_schema") | 
					
						
						|  | def tables_schema(tables: str) -> str: | 
					
						
						|  | """Get the schema and sample rows for the specified tables.""" | 
					
						
						|  | return InfoSQLDatabaseTool(db=db).invoke(tables) | 
					
						
						|  |  | 
					
						
						|  | @tool("execute_sql") | 
					
						
						|  | def execute_sql(sql_query: str) -> str: | 
					
						
						|  | """Execute a SQL query against the database and return the results.""" | 
					
						
						|  | return QuerySQLDataBaseTool(db=db).invoke(sql_query) | 
					
						
						|  |  | 
					
						
						|  | @tool("check_sql") | 
					
						
						|  | def check_sql(sql_query: str) -> str: | 
					
						
						|  | """Validate the SQL query syntax and structure before execution.""" | 
					
						
						|  | return QuerySQLCheckerTool(db=db, llm=llm).invoke({"query": sql_query}) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | sql_dev = Agent( | 
					
						
						|  | role="Senior Database Developer", | 
					
						
						|  | goal="Extract data using optimized SQL queries.", | 
					
						
						|  | backstory="An expert in writing optimized SQL queries for complex databases.", | 
					
						
						|  | llm=llm, | 
					
						
						|  | tools=[list_tables, tables_schema, execute_sql, check_sql], | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | data_analyst = Agent( | 
					
						
						|  | role="Senior Data Analyst", | 
					
						
						|  | goal="Analyze the data and produce insights.", | 
					
						
						|  | backstory="A seasoned analyst who identifies trends and patterns in datasets.", | 
					
						
						|  | llm=llm, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | report_writer = Agent( | 
					
						
						|  | role="Technical Report Writer", | 
					
						
						|  | goal="Write a structured report with Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.", | 
					
						
						|  | backstory="Specializes in detailed analytical reports without conclusions.", | 
					
						
						|  | llm=llm, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | conclusion_writer = Agent( | 
					
						
						|  | role="Conclusion Specialist", | 
					
						
						|  | goal="Summarize findings into a clear and concise 3-5 line Conclusion highlighting only the most important insights.", | 
					
						
						|  | backstory="An expert in crafting impactful and clear conclusions.", | 
					
						
						|  | llm=llm, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | extract_data = Task( | 
					
						
						|  | description="Extract data based on the query: {query}.", | 
					
						
						|  | expected_output="Database results matching the query.", | 
					
						
						|  | agent=sql_dev, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | analyze_data = Task( | 
					
						
						|  | description="Analyze the extracted data for query: {query}.", | 
					
						
						|  | expected_output="Key Insights and Analysis without any Introduction or Conclusion.", | 
					
						
						|  | agent=data_analyst, | 
					
						
						|  | context=[extract_data], | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | write_report = Task( | 
					
						
						|  | description="Write the analysis report with  Introduction, Key Insights, and Analysis. DO NOT include any Conclusion or Summary.", | 
					
						
						|  | expected_output="Markdown-formatted report excluding Conclusion.", | 
					
						
						|  | agent=report_writer, | 
					
						
						|  | context=[analyze_data], | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | write_conclusion = Task( | 
					
						
						|  | description="Write a brief and impactful 3-5 line Conclusion summarizing only the most important insights/findings. Include the max, min, and average salary" | 
					
						
						|  | "and highlight the most impactful insights.", | 
					
						
						|  | expected_output="Markdown-formatted Conclusion/Summary section with key insights and statistics.", | 
					
						
						|  | agent=conclusion_writer, | 
					
						
						|  | context=[analyze_data], | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | crew_report = Crew( | 
					
						
						|  | agents=[sql_dev, data_analyst, report_writer], | 
					
						
						|  | tasks=[extract_data, analyze_data, write_report], | 
					
						
						|  | process=Process.sequential, | 
					
						
						|  | verbose=True, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | crew_conclusion = Crew( | 
					
						
						|  | agents=[data_analyst, conclusion_writer], | 
					
						
						|  | tasks=[write_conclusion], | 
					
						
						|  | process=Process.sequential, | 
					
						
						|  | verbose=True, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tab1, tab2 = st.tabs(["π Query Insights + Viz", "π Full Data Viz"]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with tab1: | 
					
						
						|  | query = st.text_area("Enter Query:", value="Provide insights into the salary of a Principal Data Scientist.") | 
					
						
						|  | if st.button("Submit Query"): | 
					
						
						|  | with st.spinner("Processing query..."): | 
					
						
						|  |  | 
					
						
						|  | report_inputs = {"query": query + " Provide detailed analysis but DO NOT include Conclusion."} | 
					
						
						|  | report_result = crew_report.kickoff(inputs=report_inputs) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | conclusion_inputs = {"query": query + " Provide ONLY the most important insights in 3-5 concise lines."} | 
					
						
						|  | conclusion_result = crew_conclusion.kickoff(inputs=conclusion_inputs) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown(report_result if report_result else "β οΈ No Report Generated.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | visualizations = [] | 
					
						
						|  |  | 
					
						
						|  | fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd", | 
					
						
						|  | title="Salary Distribution by Job Title") | 
					
						
						|  | visualizations.append(fig_salary) | 
					
						
						|  |  | 
					
						
						|  | fig_experience = px.bar( | 
					
						
						|  | st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(), | 
					
						
						|  | x="experience_level", y="salary_in_usd", | 
					
						
						|  | title="Average Salary by Experience Level" | 
					
						
						|  | ) | 
					
						
						|  | visualizations.append(fig_experience) | 
					
						
						|  |  | 
					
						
						|  | fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", | 
					
						
						|  | title="Salary Distribution by Employment Type") | 
					
						
						|  | visualizations.append(fig_employment) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown("#### 5. Visual Insights") | 
					
						
						|  | for fig in visualizations: | 
					
						
						|  | st.plotly_chart(fig, use_container_width=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown(conclusion_result if conclusion_result else "β οΈ No Conclusion Generated.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with tab2: | 
					
						
						|  | st.subheader("π Comprehensive Data Visualizations") | 
					
						
						|  |  | 
					
						
						|  | fig1 = px.histogram(st.session_state.df, x="job_title", title="Job Title Frequency") | 
					
						
						|  | st.plotly_chart(fig1) | 
					
						
						|  |  | 
					
						
						|  | fig2 = px.bar( | 
					
						
						|  | st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(), | 
					
						
						|  | x="experience_level", y="salary_in_usd", | 
					
						
						|  | title="Average Salary by Experience Level" | 
					
						
						|  | ) | 
					
						
						|  | st.plotly_chart(fig2) | 
					
						
						|  |  | 
					
						
						|  | fig3 = px.box(st.session_state.df, x="employment_type", y="salary_in_usd", | 
					
						
						|  | title="Salary Distribution by Employment Type") | 
					
						
						|  | st.plotly_chart(fig3) | 
					
						
						|  |  | 
					
						
						|  | temp_dir.cleanup() | 
					
						
						|  | else: | 
					
						
						|  | st.info("Please load a dataset to proceed.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.sidebar: | 
					
						
						|  | st.header("π Reference:") | 
					
						
						|  | st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)") | 
					
						
						|  |  | 
					
						
						|  |  |