|
from dotenv import load_dotenv |
|
import os |
|
|
|
|
|
from threading import Thread |
|
import queue |
|
from openai import AssistantEventHandler |
|
from typing_extensions import override |
|
|
|
load_dotenv() |
|
|
|
import openai |
|
import time |
|
import gradio as gr |
|
from autogen import UserProxyAgent, config_list_from_json |
|
from datetime import timedelta, datetime |
|
import pandas as pd |
|
import numpy as np |
|
from gradio_datetimerange import DateTimeRange |
|
import os |
|
from time import sleep |
|
from gradio_pdf import PDF |
|
|
|
from pandasai.llm.openai import OpenAI |
|
from pandasai import Agent |
|
import matplotlib.pyplot as plt |
|
import io |
|
from pandasai import SmartDataframe |
|
from collections import Counter |
|
|
|
|
|
|
|
import requests |
|
|
|
|
|
|
|
functions = [ |
|
{ |
|
"name": "update_weather", |
|
"description": "Fetches and returns the current weather information for a specified location.", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "The name of the city or location to get weather information for." |
|
} |
|
}, |
|
"required": ["location"] |
|
} |
|
} |
|
] |
|
|
|
tools=[ |
|
{ |
|
"type": "function", |
|
"function": { |
|
"name": "update_weather", |
|
"description": "Fetches and returns the current weather information for a specified location.", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "The name of the city or location to get weather information for." |
|
} |
|
}, |
|
"required": ["location"] |
|
} |
|
} |
|
} |
|
] |
|
|
|
|
|
def get_weather(location: str) -> str: |
|
""" |
|
Fetches the weather for a given location and returns a dictionary |
|
|
|
Parameters: |
|
- location: the search term to find current weather information |
|
Returns: |
|
The current weather for that location |
|
""" |
|
api_key = os.environ["OPENWEATHERMAP_API_KEY"] |
|
base_url = "http://api.openweathermap.org/data/2.5/weather" |
|
params = {"q": location, "appid": api_key, "units": "imperial"} |
|
response = requests.get(base_url, params=params) |
|
weather_data = response.json() |
|
return weather_data |
|
|
|
|
|
get_weather_schema = """ |
|
{ |
|
"name": "get_weather", |
|
"description": "Fetches the weather for a location based on a search term.", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "Name of the city" |
|
} |
|
}, |
|
"required": [ |
|
"location" |
|
] |
|
} |
|
} |
|
""" |
|
|
|
|
|
|
|
def generate_date_range(start_date, end_date, freq="D"): |
|
return pd.date_range(start=start_date, end=end_date, freq=freq) |
|
|
|
|
|
|
|
def generate_synthetic_data(dates): |
|
|
|
np.random.seed(0) |
|
|
|
|
|
data = { |
|
"Temperature_Pressure_Relief_Valve": np.random.choice( |
|
[0, 1], size=len(dates) |
|
), |
|
"Outlet_Nipple_Assembly": np.random.normal( |
|
loc=80, scale=10, size=len(dates) |
|
), |
|
"Inlet_Nipple": np.random.normal( |
|
loc=50, scale=5, size=len(dates) |
|
), |
|
"Upper_Element": np.random.normal( |
|
loc=150, scale=20, size=len(dates) |
|
), |
|
"Lower_Element": np.random.normal( |
|
loc=150, scale=20, size=len(dates) |
|
), |
|
"Anode_Rod": np.random.normal( |
|
loc=7, scale=1.5, size=len(dates) |
|
), |
|
"Drain_Valve": np.random.choice( |
|
[0, 1], size=len(dates) |
|
), |
|
"Upper_Thermostat": np.random.normal( |
|
loc=120, scale=10, size=len(dates) |
|
), |
|
"Lower_Thermostat": np.random.normal( |
|
loc=120, scale=10, size=len(dates) |
|
), |
|
"Operating_Time": np.random.randint( |
|
1, 25, size=len(dates) |
|
), |
|
} |
|
|
|
|
|
midpoint_index = len(dates) // 2 |
|
anomaly_range = (midpoint_index - 5, midpoint_index + 5) |
|
|
|
|
|
data["Upper_Thermostat"][anomaly_range[0] : anomaly_range[1]] = np.random.normal( |
|
loc=200, scale=5, size=anomaly_range[1] - anomaly_range[0] |
|
) |
|
|
|
return pd.DataFrame(data, index=dates) |
|
|
|
|
|
|
|
start_date = datetime(2023, 10, 1) |
|
end_date = datetime(2024, 10, 1) |
|
dates = generate_date_range(start_date, end_date) |
|
|
|
|
|
synthetic_dataset = generate_synthetic_data(dates) |
|
|
|
now = datetime.now() |
|
|
|
synthetic_dataset["time"] = [ |
|
now - timedelta(hours=5 * i) for i in range(synthetic_dataset.shape[0]) |
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VECTOR_STORE_ID = os.environ["VECTOR_STORE_ID"] |
|
ASSISTANT_ID = os.environ["ASSISTANT_ID"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
client = openai.OpenAI() |
|
|
|
|
|
|
|
thread = client.beta.threads.create() |
|
thread_id = thread.id |
|
|
|
|
|
|
|
class EventHandler(AssistantEventHandler): |
|
def __init__(self, response_queue): |
|
super().__init__() |
|
self.response_queue = response_queue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@override |
|
def on_text_created(self, text) -> None: |
|
pass |
|
|
|
@override |
|
def on_text_delta(self, delta, snapshot): |
|
text = delta.value |
|
self.response_queue.put(text) |
|
|
|
|
|
def chat(usr_message, history): |
|
global thread_id |
|
|
|
user_input = usr_message |
|
|
|
if not thread_id: |
|
print("Error: Missing thread_id") |
|
return json.dumps({"error": "Missing thread_id"}), 400 |
|
|
|
print( |
|
f"Received message: {user_input} for thread ID: {thread_id}" |
|
) |
|
|
|
|
|
client.beta.threads.messages.create( |
|
thread_id=thread_id, role="user", content=user_input |
|
) |
|
|
|
|
|
response_queue = queue.Queue() |
|
|
|
|
|
event_handler = EventHandler(response_queue) |
|
|
|
|
|
def run_stream(): |
|
with client.beta.threads.runs.stream( |
|
thread_id=thread_id, |
|
assistant_id=ASSISTANT_ID, |
|
|
|
tool_choice = "required", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
event_handler=event_handler, |
|
) as stream: |
|
stream.until_done() |
|
|
|
stream_thread = Thread(target=run_stream) |
|
stream_thread.start() |
|
|
|
assistant_response = "" |
|
while True: |
|
try: |
|
|
|
chunk = response_queue.get(timeout=0.1) |
|
assistant_response += chunk |
|
yield assistant_response |
|
except queue.Empty: |
|
|
|
if not stream_thread.is_alive(): |
|
break |
|
|
|
|
|
stream_thread.join() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_weather(location): |
|
api_key = os.environ["OPENWEATHERMAP_API_KEY"] |
|
base_url = "http://api.openweathermap.org/data/2.5/weather" |
|
params = {"q": location, "appid": api_key, "units": "imperial"} |
|
response = requests.get(base_url, params=params) |
|
weather_data = response.json() |
|
|
|
if response.status_code != 200: |
|
return f"Error fetching weather data: {weather_data.get('message', 'Unknown error')}" |
|
|
|
lon = weather_data["coord"]["lon"] |
|
lat = weather_data["coord"]["lat"] |
|
main = weather_data["weather"][0]["main"] |
|
feels_like = weather_data["main"]["feels_like"] |
|
temp_min = weather_data["main"]["temp_min"] |
|
temp_max = weather_data["main"]["temp_max"] |
|
pressure = weather_data["main"]["pressure"] |
|
visibility = weather_data["visibility"] |
|
wind_speed = weather_data["wind"]["speed"] |
|
wind_deg = weather_data["wind"]["deg"] |
|
sunrise = datetime.fromtimestamp(weather_data["sys"]["sunrise"]).strftime('%H:%M:%S') |
|
sunset = datetime.fromtimestamp(weather_data["sys"]["sunset"]).strftime('%H:%M:%S') |
|
temp = weather_data["main"]["temp"] |
|
humidity = weather_data["main"]["humidity"] |
|
condition = weather_data["weather"][0]["description"] |
|
|
|
return f"""**Weather in {location}:** |
|
- **Coordinates:** (lon: {lon}, lat: {lat}) |
|
- **Temperature:** {temp:.2f}°F (Feels like: {feels_like:.2f}°F) |
|
- **Min Temperature:** {temp_min:.2f}°F, **Max Temperature:** {temp_max:.2f}°F |
|
- **Humidity:** {humidity}% |
|
- **Condition:** {condition.capitalize()} |
|
- **Pressure:** {pressure} hPa |
|
- **Visibility:** {visibility} meters |
|
- **Wind Speed:** {wind_speed} m/s, **Wind Direction:** {wind_deg}° |
|
- **Sunrise:** {sunrise}, **Sunset:** {sunset}""" |
|
|
|
|
|
|
|
def update_weather_forecast(location: str) -> str: |
|
""" Fetches the weather forecast for a given location and returns a formatted string |
|
Parameters: |
|
- location: the search term to find weather information |
|
Returns: |
|
A formatted string containing the weather forecast data |
|
""" |
|
|
|
api_key = os.environ["OPENWEATHERMAP_API_KEY"] |
|
base_url = "http://api.openweathermap.org/data/2.5/forecast" |
|
params = { |
|
"q": location, |
|
"appid": api_key, |
|
"units": "imperial", |
|
"cnt": 40 |
|
} |
|
response = requests.get(base_url, params=params) |
|
weather_data = response.json() |
|
if response.status_code != 200: |
|
return f"Error fetching weather data: {weather_data.get('message', 'Unknown error')}" |
|
|
|
|
|
forecast_data = {} |
|
for item in weather_data['list']: |
|
dt_txt = item['dt_txt'] |
|
date_str = dt_txt.split(' ')[0] |
|
time_str = dt_txt.split(' ')[1] |
|
forecast_data.setdefault(date_str, []) |
|
forecast_data[date_str].append({ |
|
'time': time_str, |
|
'temp': item['main']['temp'], |
|
'feels_like': item['main']['feels_like'], |
|
'humidity': item['main']['humidity'], |
|
'pressure': item['main']['pressure'], |
|
'wind_speed': item['wind']['speed'], |
|
'wind_deg': item['wind']['deg'], |
|
'condition': item['weather'][0]['description'], |
|
'visibility': item.get('visibility', 'N/A'), |
|
}) |
|
|
|
|
|
daily_summaries = {} |
|
for date_str, forecasts in forecast_data.items(): |
|
temps = [f['temp'] for f in forecasts] |
|
feels_likes = [f['feels_like'] for f in forecasts] |
|
humidities = [f['humidity'] for f in forecasts] |
|
pressures = [f['pressure'] for f in forecasts] |
|
wind_speeds = [f['wind_speed'] for f in forecasts] |
|
conditions = [f['condition'] for f in forecasts] |
|
|
|
min_temp = min(temps) |
|
max_temp = max(temps) |
|
avg_temp = sum(temps) / len(temps) |
|
avg_feels_like = sum(feels_likes) / len(feels_likes) |
|
avg_humidity = sum(humidities) / len(humidities) |
|
avg_pressure = sum(pressures) / len(pressures) |
|
avg_wind_speed = sum(wind_speeds) / len(wind_speeds) |
|
|
|
|
|
condition_counts = Counter(conditions) |
|
most_common_condition = condition_counts.most_common(1)[0][0] |
|
|
|
daily_summaries[date_str] = { |
|
'min_temp': min_temp, |
|
'max_temp': max_temp, |
|
'avg_temp': avg_temp, |
|
'avg_feels_like': avg_feels_like, |
|
'avg_humidity': avg_humidity, |
|
'avg_pressure': avg_pressure, |
|
'avg_wind_speed': avg_wind_speed, |
|
'condition': most_common_condition, |
|
} |
|
|
|
|
|
city_name = weather_data['city']['name'] |
|
ret_str = f"**5-Day Weather Forecast for {city_name}:**\n" |
|
|
|
for date_str in sorted(daily_summaries.keys()): |
|
summary = daily_summaries[date_str] |
|
ret_str += f"\n**{date_str}:**\n" |
|
ret_str += f"- **Condition:** {summary['condition'].capitalize()}\n" |
|
ret_str += f"- **Min Temperature:** {summary['min_temp']:.2f}°F\n" |
|
ret_str += f"- **Max Temperature:** {summary['max_temp']:.2f}°F\n" |
|
ret_str += f"- **Average Temperature:** {summary['avg_temp']:.2f}°F (Feels like {summary['avg_feels_like']:.2f}°F)\n" |
|
ret_str += f"- **Humidity:** {summary['avg_humidity']:.0f}%\n" |
|
ret_str += f"- **Pressure:** {summary['avg_pressure']:.0f} hPa\n" |
|
ret_str += f"- **Wind Speed:** {summary['avg_wind_speed']:.2f} m/s\n" |
|
|
|
return ret_str |
|
|
|
|
|
|
|
|
|
|
|
llmmodel = OpenAI(api_token=os.environ["OPENAI_API_KEY"], model='gpt-4o') |
|
|
|
|
|
dfcleaned = pd.read_csv("dfcleaned.csv") |
|
dfcleaned['Timestamp'] = pd.to_datetime(dfcleaned['Timestamp']) |
|
dfcleaned['off-nominal'] = dfcleaned['off-nominal'].apply(str) |
|
dfshaps = pd.read_csv("shaps.csv") |
|
dfshaps['Timestamp'] = pd.to_datetime(dfshaps['Timestamp']) |
|
|
|
|
|
agent = Agent([dfcleaned, dfshaps], config={"llm": llmmodel}) |
|
|
|
sdfshaps = SmartDataframe(dfshaps, config={"llm": llmmodel}) |
|
sdfcleaned = SmartDataframe(dfcleaned, config={"llm": llmmodel}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_query(query): |
|
response = agent.chat(query) |
|
print(response) |
|
|
|
|
|
text_output = None |
|
image_output = None |
|
dataframe_output = None |
|
text_visible = False |
|
image_visible = False |
|
dataframe_visible = False |
|
|
|
if isinstance(response, str) and ".png" not in response: |
|
text_output = response |
|
text_visible = True |
|
elif isinstance(response, str) and ".png" in response: |
|
image_output = response |
|
image_visible = True |
|
elif isinstance(response, pd.DataFrame): |
|
dataframe_output = response |
|
dataframe_visible = True |
|
|
|
return ( |
|
text_output, |
|
image_output, |
|
dataframe_output, |
|
gr.update(visible=text_visible), |
|
gr.update(visible=image_visible), |
|
gr.update(visible=dataframe_visible) |
|
) |
|
|
|
|
|
|
|
|
|
|
|
def gradio_app(): |
|
iface = gr.Interface( |
|
fn=process_query, |
|
inputs="text", |
|
outputs=[ |
|
gr.Textbox(label="Response"), |
|
gr.Image(label="Plot"), |
|
gr.DataFrame(label="Dataframe") |
|
], |
|
title="pandasai Query Processor", |
|
description="Enter your query related to the csv data files." |
|
) |
|
return iface |
|
|
|
with gr.Blocks( |
|
|
|
theme = gr.themes.Soft(), |
|
) as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
location1 = gr.Textbox(label="Enter location for weather (e.g., Rio Rancho, New Mexico)", |
|
value="Cambridge, Massachusetts") |
|
weather_button = gr.Button("Get Weather") |
|
|
|
output1 = gr.Textbox(label="Weather Information", lines=8, max_lines=8, show_label=True, show_copy_button=True) |
|
weather_button.click( |
|
fn=update_weather, |
|
inputs=location1, |
|
outputs=output1, |
|
api_name="update_weather", |
|
) |
|
with gr.Column(): |
|
location2 = gr.Textbox(label="Enter location for weather forecast (e.g., Rio Rancho, New Mexico)", |
|
value="Cambridge, Massachusetts") |
|
weather_forecast_button = gr.Button("Get 5-Day Weather Forecast") |
|
|
|
output2 = gr.Textbox(label="Weather 5-Day Forecast Information", lines=8, max_lines=8, |
|
show_label=True, show_copy_button=True) |
|
weather_forecast_button.click( |
|
fn=update_weather_forecast, |
|
inputs=location2, |
|
outputs=output2, |
|
api_name="update_weather_forecast", |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("# Building Automation Assistant") |
|
|
|
gr.Markdown( |
|
"I'm an AI assistant that can help with building maintenance and equipment questions." |
|
) |
|
|
|
gr.Markdown("---") |
|
|
|
|
|
chat_interface = gr.ChatInterface( |
|
chat, |
|
|
|
|
|
chatbot=gr.Chatbot(height=750, show_copy_button=True, show_copy_all_button=True, |
|
avatar_images=("user_avatar.png", "assistant_avatar.png")), |
|
title="Ask Me Anything", |
|
examples_per_page= 5, |
|
|
|
description="Type your question about building automation here.", |
|
examples=[ |
|
"Tell me about the HouseZero dataset. Retrieve information from the publication you have access to. Use your file retrieval tool.", |
|
"Describe in detail the relationshp between the columns in the two uploaded CSV files and the information you have access to regarding the HouseZero dataset. Be verbose. Use your file retrieval tool.", |
|
"Tell be in great detail any advice you have to maintain a small to midsize office building, like the HouseZero data corresponds to. Be verbose. Use your file retrieval tool.", |
|
"please caculate the correlation of each feature with the anomaly_score and retuen the values in descending order. return the top 10 rows.", |
|
"Tell me in great detail any advice you have for the building managers of large hospitals. Be verbose. Use your file retrieval tool.", |
|
"Show massachusetts electricity billing rates during the same time span as the CSV data", |
|
"Use those rates and the relevant columns in the CSV files to estimate how much it costs to operate this building per month.", |
|
"What is the estimated average electricity cost for operating the building using massachusetts energy rates. use your file retrieval tool. use data csv files for building data. Limit your response to 140 characters. Use your file retrieval tool.", |
|
"The anomaly_score field on one of the CSVs indicates that that row is an anomaly if it has value greater than zero.. can you please list a few of the rows with the highest value for this column and using your building efficiency knowledge explain why they may represent a problem? Use your file retrieval tool.", |
|
"Based on the data in these CSV files, can you assign an EnergyIQ score from 1-10 that reflects how well the building is operating? Explain the reason for your score and provide any recommendations on actions to take that can improve it in the future. Be verbose. Use your file retrieval tool.", |
|
"What would be a good feature to plot as a function of time to illustrate the problems of why the EnergyIQ score is low? Use your file retreival tool.", |
|
], |
|
fill_height=True, |
|
) |
|
|
|
gr.Markdown("---") |
|
with gr.Column(): |
|
|
|
|
|
anomaly_plot = gr.ScatterPlot( |
|
dfcleaned, |
|
x="Timestamp", |
|
y="Z5_RH", |
|
color="off-nominal", |
|
title="Anomaly Score" |
|
) |
|
|
|
zone3_plot = gr.ScatterPlot( |
|
dfcleaned, |
|
x="Timestamp", |
|
y="Z3_RH", |
|
color="off-nominal", |
|
title="Zone 3 Relative Humidity", |
|
) |
|
|
|
zone4_plot = gr.ScatterPlot( |
|
dfcleaned, |
|
x="Timestamp", |
|
y="Z4_RH", |
|
color="off-nominal", |
|
title="Zone 4 Relative Humidity", |
|
) |
|
|
|
|
|
plots = [anomaly_plot, zone3_plot, zone4_plot] |
|
|
|
def select_region(selection: gr.SelectData): |
|
""" |
|
Handles the region selection event. |
|
|
|
Args: |
|
selection (gr.SelectData): The data from the selection event. |
|
|
|
Returns: |
|
List[gr.Plot.update]: A list of update instructions for each plot. |
|
""" |
|
if selection is None or selection.index is None: |
|
return [gr.Plot.update() for _ in plots] |
|
|
|
min_x, max_x = selection.index |
|
|
|
return [gr.ScatterPlot(x_lim=(min_x, max_x)) for _ in plots] |
|
|
|
def reset_region(): |
|
""" |
|
Resets the x-axis limits for all plots. |
|
|
|
Returns: |
|
List[gr.Plot.update]: A list of update instructions to reset x_lim. |
|
""" |
|
return [gr.ScatterPlot(x_lim=None) for _ in plots] |
|
|
|
|
|
for plot in plots: |
|
plot.select( |
|
select_region, |
|
inputs=None, |
|
outputs=plots |
|
) |
|
plot.double_click( |
|
reset_region, |
|
inputs=None, |
|
outputs=plots |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Column(): |
|
query = gr.Textbox(label="Enter your question about the data", |
|
value="Plot the anomaly_score as a function of time and highlight the highest 20 values") |
|
query_button = gr.Button("Submit Data Query") |
|
with gr.Row(): |
|
with gr.Column(visible=False) as output_col1: |
|
out1 = gr.Textbox(label="Response") |
|
with gr.Column(visible=False) as output_col2: |
|
out2 = gr.Image(label="Plot") |
|
with gr.Column(visible=False) as output_col3: |
|
out3 = gr.DataFrame(label="DataFrame") |
|
query_button.click( |
|
fn=process_query, |
|
inputs=query, |
|
outputs=[ |
|
out1, |
|
out2, |
|
out3, |
|
output_col1, |
|
output_col2, |
|
output_col3 |
|
], |
|
api_name="process_query" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch(share=False) |