ddooling commited on
Commit
f5f13d0
·
verified ·
1 Parent(s): 4904939

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dfcleaned.csv filter=lfs diff=lfs merge=lfs -text
37
+ usedpdfs/AEDG50-LargeHospitals-2012-20120509.pdf filter=lfs diff=lfs merge=lfs -text
38
+ usedpdfs/AEDGZE-SmallMedOfc-2019-20190614.pdf filter=lfs diff=lfs merge=lfs -text
39
+ usedpdfs/FCM-VCM-VCS[[:space:]]FLASHING[[:space:]]Suggestion[[:space:]]-[[:space:]]2024.pdf filter=lfs diff=lfs merge=lfs -text
40
+ usedpdfs/PNNL-20761.pdf filter=lfs diff=lfs merge=lfs -text
41
+ usedpdfs/gbpn-finaloct-2012.pdf filter=lfs diff=lfs merge=lfs -text
42
+ usedpdfs/heat-pump-guide-ap-series.pdf filter=lfs diff=lfs merge=lfs -text
43
+ usedpdfs/nea-smart-2-0-consumer.pdf filter=lfs diff=lfs merge=lfs -text
44
+ usedpdfs/s41597-024-03770-7.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ # Add these imports
5
+ from threading import Thread
6
+ import queue
7
+ from openai import AssistantEventHandler
8
+ from typing_extensions import override
9
+ import json
10
+
11
+ load_dotenv()
12
+
13
+ import openai
14
+ import time
15
+ import gradio as gr
16
+ #from autogen import UserProxyAgent, config_list_from_json
17
+ from datetime import timedelta, datetime
18
+ import pandas as pd
19
+ import numpy as np
20
+ #from gradio_datetimerange import DateTimeRange
21
+ import os
22
+ from time import sleep
23
+ from gradio_pdf import PDF
24
+
25
+ from pandasai.llm.openai import OpenAI
26
+ from pandasai import Agent
27
+ import matplotlib.pyplot as plt
28
+ import io
29
+ from pandasai import SmartDataframe
30
+ from collections import Counter
31
+ from gradio_pdf import PDF # Ensure you have installed gradio_pdf
32
+
33
+ from tavily import TavilyClient # Ensure you have installed the tavily library
34
+
35
+
36
+ # llmmodel = OpenAI(api_token=os.environ["OPENAI_API_KEY"], model='gpt-4o')
37
+
38
+
39
+ import requests
40
+
41
+
42
+ # Define the directory containing the PDFs
43
+ PDF_DIR = "usedpdfs" # Replace with your directory path
44
+
45
+ # Define your desired default PDF file
46
+ DEFAULT_PDF = "s41597-024-03770-7.pdf" # Replace with your actual PDF filename
47
+
48
+
49
+
50
+ # Ensure the PDF_DIR exists
51
+ if not os.path.isdir(PDF_DIR):
52
+ raise ValueError(f"The directory '{PDF_DIR}' does not exist. Please check the path.")
53
+
54
+
55
+
56
+ # Get list of PDF files in the directory
57
+ pdf_files = [f for f in os.listdir(PDF_DIR) if f.lower().endswith('.pdf')]
58
+
59
+ if DEFAULT_PDF not in pdf_files:
60
+ raise ValueError(f"Default PDF '{DEFAULT_PDF}' not found in '{PDF_DIR}'.")
61
+
62
+ # Check if there are PDF files in the directory
63
+ if not pdf_files:
64
+ raise ValueError(f"No PDF files found in the directory '{PDF_DIR}'.")
65
+
66
+ def display_pdf(selected_file):
67
+ """
68
+ Given the selected file name, return the full path to display in the PDF viewer.
69
+ """
70
+ file_path = os.path.join(PDF_DIR, selected_file)
71
+ return file_path
72
+
73
+
74
+
75
+
76
+
77
+ def web_search(query: str) -> str:
78
+ """
79
+ Performs a web search using the Tavily API and returns the context string.
80
+
81
+ Parameters:
82
+ - query (str): The search query.
83
+
84
+ Returns:
85
+ - str: The context string from the Tavily API or an error message.
86
+ """
87
+ try:
88
+ # Step 1: Instantiate the TavilyClient
89
+ tavily_client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
90
+
91
+ # Step 2: Execute the search query
92
+ context = tavily_client.get_search_context(query=query)
93
+
94
+ # Step 3: Return the context
95
+ return f"**Web Search Context:**\n{context}"
96
+ except Exception as e:
97
+ return f"Error performing web search: {str(e)}"
98
+
99
+
100
+
101
+ # Ensure the PDF_DIR exists
102
+ if not os.path.isdir(PDF_DIR):
103
+ raise ValueError(f"The directory '{PDF_DIR}' does not exist. Please check the path.")
104
+
105
+ # Get list of PDF files in the directory
106
+ pdf_files = [f for f in os.listdir(PDF_DIR) if f.lower().endswith('.pdf')]
107
+
108
+ # Check if there are PDF files in the directory
109
+ if not pdf_files:
110
+ raise ValueError(f"No PDF files found in the directory '{PDF_DIR}'.")
111
+
112
+ def display_pdf(selected_file):
113
+ """
114
+ Given the selected file name, return the full path to display in the PDF viewer.
115
+ """
116
+ file_path = os.path.join(PDF_DIR, selected_file)
117
+ return file_path
118
+
119
+
120
+
121
+ # Function to generate a date range
122
+ def generate_date_range(start_date, end_date, freq="D"):
123
+ return pd.date_range(start=start_date, end=end_date, freq=freq)
124
+
125
+
126
+ # Function to generate synthetic data for each component
127
+ def generate_synthetic_data(dates):
128
+ # Define random seed for reproducibility
129
+ np.random.seed(0)
130
+
131
+ # Generate random data for each component
132
+ data = {
133
+ "Temperature_Pressure_Relief_Valve": np.random.choice(
134
+ [0, 1], size=len(dates)
135
+ ), # 0 = OK, 1 = Faulty
136
+ "Outlet_Nipple_Assembly": np.random.normal(
137
+ loc=80, scale=10, size=len(dates)
138
+ ), # Temperature in °F
139
+ "Inlet_Nipple": np.random.normal(
140
+ loc=50, scale=5, size=len(dates)
141
+ ), # Temperature in °F
142
+ "Upper_Element": np.random.normal(
143
+ loc=150, scale=20, size=len(dates)
144
+ ), # Wattage (Watts)
145
+ "Lower_Element": np.random.normal(
146
+ loc=150, scale=20, size=len(dates)
147
+ ), # Wattage (Watts)
148
+ "Anode_Rod": np.random.normal(
149
+ loc=7, scale=1.5, size=len(dates)
150
+ ), # Length in inches
151
+ "Drain_Valve": np.random.choice(
152
+ [0, 1], size=len(dates)
153
+ ), # 0 = Closed, 1 = Open
154
+ "Upper_Thermostat": np.random.normal(
155
+ loc=120, scale=10, size=len(dates)
156
+ ), # Temperature in °F
157
+ "Lower_Thermostat": np.random.normal(
158
+ loc=120, scale=10, size=len(dates)
159
+ ), # Temperature in °F
160
+ "Operating_Time": np.random.randint(
161
+ 1, 25, size=len(dates)
162
+ ), # Operating time in hours
163
+ }
164
+
165
+ # Inject an anomaly in the Upper Thermostat values around the midpoint
166
+ midpoint_index = len(dates) // 2
167
+ anomaly_range = (midpoint_index - 5, midpoint_index + 5)
168
+
169
+ # Create a spike in Upper Thermostat values
170
+ data["Upper_Thermostat"][anomaly_range[0] : anomaly_range[1]] = np.random.normal(
171
+ loc=200, scale=5, size=anomaly_range[1] - anomaly_range[0]
172
+ )
173
+
174
+ return pd.DataFrame(data, index=dates)
175
+
176
+
177
+ # Generate the dataset
178
+ start_date = datetime(2023, 10, 1)
179
+ end_date = datetime(2024, 10, 1)
180
+ dates = generate_date_range(start_date, end_date)
181
+
182
+ # Create a DataFrame with synthetic data
183
+ synthetic_dataset = generate_synthetic_data(dates)
184
+
185
+ now = datetime.now()
186
+
187
+ synthetic_dataset["time"] = [
188
+ now - timedelta(hours=5 * i) for i in range(synthetic_dataset.shape[0])
189
+ ]
190
+
191
+ # something whcky happened with the vector store. i don't know what the fuck happened.
192
+ # have to create a new assistant.
193
+
194
+ # you need to have system instructions ilke this
195
+ # You are a helpful assistant and expert at ansewring building automation questions. Always carry out a file search for the desired information. You can augment that information with your general knowledge, but alwasy carry out a file seaach with every query first to see if the relevant information is there, and then add to that afterwards.
196
+
197
+ # name : Building Energy and Efficiency Expert
198
+
199
+ # And also added repitiion of the instructions in the thread / run creation.
200
+
201
+ VECTOR_STORE_ID = os.environ["VECTOR_STORE_ID"] # will need to be updated. what the hell happened??
202
+ ASSISTANT_ID = os.environ["ASSISTANT_ID"]
203
+
204
+
205
+
206
+ # small medium offices are waht is relevant to this dataset.
207
+
208
+ # Initialize the client
209
+ client = openai.OpenAI()
210
+
211
+
212
+ # Step 2: Create a Thread
213
+ thread = client.beta.threads.create()
214
+ thread_id = thread.id
215
+
216
+
217
+ # Define the EventHandler class
218
+ class EventHandler(AssistantEventHandler):
219
+ def __init__(self, response_queue):
220
+ super().__init__()
221
+ self.response_queue = response_queue
222
+
223
+ @override
224
+ def on_text_created(self, text) -> None:
225
+ pass
226
+
227
+ @override
228
+ def on_text_delta(self, delta, snapshot):
229
+ text = delta.value
230
+ self.response_queue.put(text)
231
+
232
+ @override
233
+ def on_event(self, event):
234
+ # Retrieve events that are denoted with 'requires_action'
235
+ # since these will have our tool_calls
236
+ if event.event == 'thread.run.requires_action':
237
+ run_id = event.data.id # Retrieve the run ID from the event data
238
+ self.handle_requires_action(event.data, run_id)
239
+
240
+ def handle_requires_action(self, data, run_id):
241
+ tool_outputs = []
242
+
243
+ for tool in data.required_action.submit_tool_outputs.tool_calls:
244
+ if tool.function.name == "update_weather_forecast":
245
+ print(tool.function.arguments)
246
+ args = json.loads(tool.function.arguments)
247
+ loc = args["location"]
248
+ tool_outputs.append({"tool_call_id": tool.id, "output": update_weather_forecast(loc)})
249
+ elif tool.function.name == "update_weather":
250
+ print(tool.function.arguments)
251
+ args = json.loads(tool.function.arguments)
252
+ loc = args["location"]
253
+ tool_outputs.append({"tool_call_id": tool.id, "output": update_weather(loc)})
254
+ elif tool.function.name == "web_search":
255
+ print(tool.function.arguments)
256
+ args = json.loads(tool.function.arguments)
257
+ query = args["query"]
258
+ tool_outputs.append({"tool_call_id": tool.id, "output": web_search(query)})
259
+
260
+ # Submit all tool_outputs at the same time
261
+ self.submit_tool_outputs(tool_outputs, run_id)
262
+
263
+ def submit_tool_outputs(self, tool_outputs, run_id):
264
+ # Use the submit_tool_outputs_stream helper
265
+ with client.beta.threads.runs.submit_tool_outputs_stream(
266
+ thread_id=self.current_run.thread_id,
267
+ run_id=self.current_run.id,
268
+ tool_outputs=tool_outputs,
269
+ event_handler=EventHandler(self.response_queue),
270
+ ) as stream:
271
+ for text in stream.text_deltas:
272
+ print(text, end="", flush=True)
273
+ print()
274
+
275
+
276
+ def chat(usr_message, history):
277
+ global thread_id
278
+ # start_conversation()
279
+ user_input = usr_message
280
+
281
+ if not thread_id:
282
+ print("Error: Missing thread_id") # Debugging line
283
+ return json.dumps({"error": "Missing thread_id"}), 400
284
+
285
+ print(
286
+ f"Received message: {user_input} for thread ID: {thread_id}"
287
+ ) # Debugging line
288
+
289
+ # Add the user's message to the thread
290
+ client.beta.threads.messages.create(
291
+ thread_id=thread_id, role="user", content=user_input
292
+ )
293
+
294
+ # Create a queue to hold the assistant's response chunks
295
+ response_queue = queue.Queue()
296
+
297
+ # Instantiate the event handler with the queue
298
+
299
+ # Start the streaming run in a separate thread
300
+ def run_stream():
301
+ with client.beta.threads.runs.stream(
302
+ thread_id=thread_id,
303
+ assistant_id=ASSISTANT_ID,
304
+ tool_choice = "required",
305
+ event_handler=EventHandler(response_queue),
306
+ ) as stream:
307
+ stream.until_done()
308
+
309
+ stream_thread = Thread(target=run_stream)
310
+ stream_thread.start()
311
+
312
+ assistant_response = ""
313
+ while True:
314
+ try:
315
+ # Get response chunks from the queue
316
+ chunk = response_queue.get(timeout=0.1)
317
+ assistant_response += chunk
318
+ yield assistant_response
319
+ except queue.Empty:
320
+ # Check if the stream has finished
321
+ if not stream_thread.is_alive():
322
+ break
323
+
324
+ # Wait for the stream thread to finish
325
+ stream_thread.join()
326
+
327
+
328
+ def update_weather(location):
329
+ api_key = os.environ["OPENWEATHERMAP_API_KEY"]
330
+ base_url = "http://api.openweathermap.org/data/2.5/weather"
331
+ params = {"q": location, "appid": api_key, "units": "imperial"}
332
+ response = requests.get(base_url, params=params)
333
+ weather_data = response.json()
334
+
335
+ if response.status_code != 200:
336
+ return f"Error fetching weather data: {weather_data.get('message', 'Unknown error')}"
337
+
338
+ lon = weather_data["coord"]["lon"]
339
+ lat = weather_data["coord"]["lat"]
340
+ main = weather_data["weather"][0]["main"]
341
+ feels_like = weather_data["main"]["feels_like"]
342
+ temp_min = weather_data["main"]["temp_min"]
343
+ temp_max = weather_data["main"]["temp_max"]
344
+ pressure = weather_data["main"]["pressure"]
345
+ visibility = weather_data["visibility"]
346
+ wind_speed = weather_data["wind"]["speed"]
347
+ wind_deg = weather_data["wind"]["deg"]
348
+ sunrise = datetime.fromtimestamp(weather_data["sys"]["sunrise"]).strftime('%H:%M:%S')
349
+ sunset = datetime.fromtimestamp(weather_data["sys"]["sunset"]).strftime('%H:%M:%S')
350
+ temp = weather_data["main"]["temp"]
351
+ humidity = weather_data["main"]["humidity"]
352
+ condition = weather_data["weather"][0]["description"]
353
+
354
+ return f"""**Weather in {location}:**
355
+ - **Coordinates:** (lon: {lon}, lat: {lat})
356
+ - **Temperature:** {temp:.2f}°F (Feels like: {feels_like:.2f}°F)
357
+ - **Min Temperature:** {temp_min:.2f}°F, **Max Temperature:** {temp_max:.2f}°F
358
+ - **Humidity:** {humidity}%
359
+ - **Condition:** {condition.capitalize()}
360
+ - **Pressure:** {pressure} hPa
361
+ - **Visibility:** {visibility} meters
362
+ - **Wind Speed:** {wind_speed} m/s, **Wind Direction:** {wind_deg}°
363
+ - **Sunrise:** {sunrise}, **Sunset:** {sunset}"""
364
+
365
+
366
+
367
+ def update_weather_forecast(location: str) -> str:
368
+ """ Fetches the weather forecast for a given location and returns a formatted string
369
+ Parameters:
370
+ - location: the search term to find weather information
371
+ Returns:
372
+ A formatted string containing the weather forecast data
373
+ """
374
+
375
+ api_key = os.environ["OPENWEATHERMAP_API_KEY"]
376
+ base_url = "http://api.openweathermap.org/data/2.5/forecast"
377
+ params = {
378
+ "q": location,
379
+ "appid": api_key,
380
+ "units": "imperial",
381
+ "cnt": 40 # Request 40 data points (5 days * 8 three-hour periods)
382
+ }
383
+ response = requests.get(base_url, params=params)
384
+ weather_data = response.json()
385
+ if response.status_code != 200:
386
+ return f"Error fetching weather data: {weather_data.get('message', 'Unknown error')}"
387
+
388
+ # Organize forecast data per date
389
+ forecast_data = {}
390
+ for item in weather_data['list']:
391
+ dt_txt = item['dt_txt'] # 'YYYY-MM-DD HH:MM:SS'
392
+ date_str = dt_txt.split(' ')[0] # 'YYYY-MM-DD'
393
+ time_str = dt_txt.split(' ')[1] # 'HH:MM:SS'
394
+ forecast_data.setdefault(date_str, [])
395
+ forecast_data[date_str].append({
396
+ 'time': time_str,
397
+ 'temp': item['main']['temp'],
398
+ 'feels_like': item['main']['feels_like'],
399
+ 'humidity': item['main']['humidity'],
400
+ 'pressure': item['main']['pressure'],
401
+ 'wind_speed': item['wind']['speed'],
402
+ 'wind_deg': item['wind']['deg'],
403
+ 'condition': item['weather'][0]['description'],
404
+ 'visibility': item.get('visibility', 'N/A'), # sometimes visibility may be missing
405
+ })
406
+
407
+ # Process data to create daily summaries
408
+ daily_summaries = {}
409
+ for date_str, forecasts in forecast_data.items():
410
+ temps = [f['temp'] for f in forecasts]
411
+ feels_likes = [f['feels_like'] for f in forecasts]
412
+ humidities = [f['humidity'] for f in forecasts]
413
+ pressures = [f['pressure'] for f in forecasts]
414
+ wind_speeds = [f['wind_speed'] for f in forecasts]
415
+ conditions = [f['condition'] for f in forecasts]
416
+
417
+ min_temp = min(temps)
418
+ max_temp = max(temps)
419
+ avg_temp = sum(temps) / len(temps)
420
+ avg_feels_like = sum(feels_likes) / len(feels_likes)
421
+ avg_humidity = sum(humidities) / len(humidities)
422
+ avg_pressure = sum(pressures) / len(pressures)
423
+ avg_wind_speed = sum(wind_speeds) / len(wind_speeds)
424
+
425
+ # Find the most common weather condition
426
+ condition_counts = Counter(conditions)
427
+ most_common_condition = condition_counts.most_common(1)[0][0]
428
+
429
+ daily_summaries[date_str] = {
430
+ 'min_temp': min_temp,
431
+ 'max_temp': max_temp,
432
+ 'avg_temp': avg_temp,
433
+ 'avg_feels_like': avg_feels_like,
434
+ 'avg_humidity': avg_humidity,
435
+ 'avg_pressure': avg_pressure,
436
+ 'avg_wind_speed': avg_wind_speed,
437
+ 'condition': most_common_condition,
438
+ }
439
+
440
+ # Build the formatted string
441
+ city_name = weather_data['city']['name']
442
+ ret_str = f"**5-Day Weather Forecast for {city_name}:**\n"
443
+
444
+ for date_str in sorted(daily_summaries.keys()):
445
+ summary = daily_summaries[date_str]
446
+ ret_str += f"\n**{date_str}:**\n"
447
+ ret_str += f"- **Condition:** {summary['condition'].capitalize()}\n"
448
+ ret_str += f"- **Min Temperature:** {summary['min_temp']:.2f}°F\n"
449
+ ret_str += f"- **Max Temperature:** {summary['max_temp']:.2f}°F\n"
450
+ ret_str += f"- **Average Temperature:** {summary['avg_temp']:.2f}°F (Feels like {summary['avg_feels_like']:.2f}°F)\n"
451
+ ret_str += f"- **Humidity:** {summary['avg_humidity']:.0f}%\n"
452
+ ret_str += f"- **Pressure:** {summary['avg_pressure']:.0f} hPa\n"
453
+ ret_str += f"- **Wind Speed:** {summary['avg_wind_speed']:.2f} m/s\n"
454
+
455
+ return ret_str
456
+
457
+
458
+ llmmodel = OpenAI(api_token=os.environ["OPENAI_API_KEY"], model='gpt-4o')
459
+
460
+ # Load dataframes
461
+ dfcleaned = pd.read_csv("dfcleaned.csv")
462
+ dfcleaned['Timestamp'] = pd.to_datetime(dfcleaned['Timestamp'])
463
+ dfcleaned['off-nominal'] = dfcleaned['off-nominal'].apply(str)
464
+ dfshaps = pd.read_csv("shaps.csv")
465
+ dfshaps['Timestamp'] = pd.to_datetime(dfshaps['Timestamp'])
466
+
467
+ # Initialize Agent
468
+ agent = Agent([dfcleaned, dfshaps], config={"llm": llmmodel})
469
+
470
+ sdfshaps = SmartDataframe(dfshaps, config={"llm": llmmodel})
471
+ sdfcleaned = SmartDataframe(dfcleaned, config={"llm": llmmodel})
472
+
473
+
474
+
475
+ def process_query(query):
476
+ response = agent.chat(query) # Replace with your actual agent chat implementation
477
+ print(response)
478
+
479
+ # Initialize outputs and visibility flags
480
+ text_output = None
481
+ image_output = None
482
+ dataframe_output = None
483
+ text_visible = False
484
+ image_visible = False
485
+ dataframe_visible = False
486
+
487
+ if isinstance(response, str) and ".png" not in response:
488
+ text_output = response
489
+ text_visible = True
490
+ elif isinstance(response, str) and ".png" in response:
491
+ image_output = response # Assuming response is a filepath or URL to the image
492
+ image_visible = True
493
+ elif isinstance(response, pd.DataFrame):
494
+ dataframe_output = response
495
+ dataframe_visible = True
496
+
497
+ return (
498
+ text_output,
499
+ image_output,
500
+ dataframe_output,
501
+ gr.update(visible=text_visible),
502
+ gr.update(visible=image_visible),
503
+ gr.update(visible=dataframe_visible)
504
+ )
505
+
506
+
507
+
508
+ def gradio_app():
509
+ iface = gr.Interface(
510
+ fn=process_query,
511
+ inputs="text",
512
+ outputs=[
513
+ gr.Textbox(label="Response"),
514
+ gr.Image(label="Plot"),
515
+ gr.DataFrame(label="Dataframe")
516
+ ],
517
+ title="pandasai Query Processor",
518
+ description="Enter your query related to the csv data files."
519
+ )
520
+ return iface
521
+
522
+ with gr.Blocks(
523
+ # theme=gr.themes.Monochrome(primary_hue="green"),
524
+ theme = gr.themes.Soft(),
525
+ ) as demo:
526
+ with gr.Row(): # Combine the two weather functions into a single row
527
+ with gr.Column():
528
+ location1 = gr.Textbox(label="Enter location for weather (e.g., Rio Rancho, New Mexico)",
529
+ value="Cambridge, Massachusetts")
530
+ weather_button = gr.Button("Get Weather")
531
+ # output1 = gr.Markdown(label="Weather Information")
532
+ output1 = gr.Textbox(label="Weather Information", lines=8, max_lines=8, show_label=True, show_copy_button=True)
533
+ weather_button.click(
534
+ fn=update_weather,
535
+ inputs=location1,
536
+ outputs=output1,
537
+ api_name="update_weather",
538
+ )
539
+ with gr.Column():
540
+ location2 = gr.Textbox(label="Enter location for weather forecast (e.g., Rio Rancho, New Mexico)",
541
+ value="Cambridge, Massachusetts")
542
+ weather_forecast_button = gr.Button("Get 5-Day Weather Forecast")
543
+ # output2 = gr.Markdown(label="Weather Forecast Information")
544
+ output2 = gr.Textbox(label="Weather 5-Day Forecast Information", lines=8, max_lines=8,
545
+ show_label=True, show_copy_button=True)
546
+ weather_forecast_button.click(
547
+ fn=update_weather_forecast,
548
+ inputs=location2,
549
+ outputs=output2,
550
+ api_name="update_weather_forecast",
551
+ )
552
+ gr.Markdown("# 📄 PDF Viewer Section")
553
+ gr.Markdown("Select a PDF from the dropdown below to view it.")
554
+
555
+ with gr.Accordion("Open PDF Selection", open=False):
556
+ with gr.Row():
557
+ # Assign a larger scale to the dropdown
558
+ dropdown = gr.Dropdown(
559
+ choices=pdf_files,
560
+ label="Select a PDF",
561
+ value=DEFAULT_PDF, # Set a default value
562
+ scale=1 # This component takes twice the space
563
+ )
564
+ # Assign a smaller scale to the PDF viewer
565
+ pdf_viewer = PDF(
566
+ label="PDF Viewer",
567
+ interactive=True,
568
+ scale=3 ,
569
+ value=display_pdf(DEFAULT_PDF)# This component takes half the space compared to dropdown
570
+ )
571
+
572
+ # Set up the event: when dropdown changes, update the PDF viewer
573
+ dropdown.change(
574
+ fn=display_pdf,
575
+ inputs=dropdown,
576
+ outputs=pdf_viewer
577
+ )
578
+ with gr.Row():
579
+ with gr.Column(scale=1):
580
+ gr.Markdown("# Building Automation Assistant")
581
+
582
+ gr.Markdown(
583
+ "I'm an AI assistant that can help with building maintenance and equipment questions."
584
+ )
585
+
586
+ gr.Markdown("---")
587
+
588
+ # Update the ChatInterface to handle streaming
589
+ chat_interface = gr.ChatInterface(
590
+ chat,
591
+ #show_label=True,
592
+ # show_copy_button=True,
593
+ chatbot=gr.Chatbot(height=750, show_copy_button=True, show_copy_all_button=True,
594
+ avatar_images=("user_avatar.png", "assistant_avatar.png")),
595
+ title="Ask Me Anything",
596
+ examples_per_page= 5,
597
+ # theme="soft", # glass
598
+ description="Type your question about building automation here.",
599
+ # cache_examples=True,
600
+ examples=[
601
+ # "Give the weather forecast for Cambridge, MA",
602
+ # "Give me the weather forecast for New York, NY. express the windspeed in miles per hour.",
603
+ "list the authors on the academic paper associated with the homezero project.",
604
+ "What are some good API services that i can use to help fetch relevant data for building automation purposes? include hyperlinks in your response.",
605
+ "show the first few rows of each of the uploaded csv files",
606
+ # "What are the current maintenance protocols for HouseZero?",
607
+ # "How do the maintenance protocols for HouseZero compare to industry best practices?",
608
+ "What are the most common maintenance challenges faced by net-zero energy buildings?",
609
+ "How does the Uponor Climate Control Network System contribute to building maintenance?",
610
+ "What role do smart systems play in the maintenance of sustainable buildings like HouseZero?",
611
+ "Can you provide data on the energy performance of HouseZero over the past year?",
612
+ "Tell me about the HouseZero dataset. Retrieve information from the publication you have access to. Use your file retrieval tool.",
613
+ "Describe in detail the relationshp between the columns and values in the uploaded CSV files and the information you have access to regarding the HouseZero dataset. Be verbose. Use your file retrieval tool.",
614
+ "Please comment on the zone relative humidity features, specifically if they indicate a problem withthe building",
615
+ "Give me in great detail any advice you have to maintain a small to midsize office building, like the HouseZero data corresponds to. Be verbose. Use your file retrieval tool.",
616
+ "Is there any information in the datafiles that indicates a problem with the building?",
617
+ "Show Massachusetts electricity billing rates during the same time span as the CSV data",
618
+ "Use those rates and the relevant columns in the CSV files to estimate how much it costs to operate this building per month.",
619
+ "What is the estimated average electricity cost for operating the building using massachusetts energy rates. use your file retrieval tool. use data csv files for building data. Limit your response to 140 characters. Use your file retrieval tool.",
620
+ "Based on the data in these CSV files, can you assign an EnergyIQ score from 1-10 that reflects how well the building is operating? Explain the reason for your score and provide any recommendations on actions to take that can improve it in the future. Be verbose. Use your file retrieval tool.",
621
+ "Please summarize information concerning sensor networks that may be leading to faulty meaurements.",
622
+ "Tell me how to properly install the PVC sky lights.",
623
+ "Based on data and insights, what specific changes should be made to HouseZero's maintenance protocols?",
624
+ "what recommendations do you have to mitigate against high relative humidity zone measurements in structures like the housezero building?"
625
+ ],
626
+ fill_height=True,
627
+ cache_examples=True,
628
+ )
629
+
630
+ gr.Markdown("---")
631
+ with gr.Accordion("Example Plots Section", open=False):
632
+ with gr.Column():
633
+ # with gr.Column():
634
+ # Define the three ScatterPlot components
635
+ anomaly_plot = gr.ScatterPlot(
636
+ dfcleaned,
637
+ x="Timestamp",
638
+ y="Z5_RH",
639
+ color="off-nominal",
640
+ title="Anomaly Score"
641
+ )
642
+
643
+ zone3_plot = gr.ScatterPlot(
644
+ dfcleaned,
645
+ x="Timestamp",
646
+ y="Z3_RH",
647
+ color="off-nominal",
648
+ title="Zone 3 Relative Humidity",
649
+ )
650
+
651
+ zone4_plot = gr.ScatterPlot(
652
+ dfcleaned,
653
+ x="Timestamp",
654
+ y="Z4_RH",
655
+ color="off-nominal",
656
+ title="Zone 4 Relative Humidity",
657
+ )
658
+
659
+ # Group all plots into a list for easy management
660
+ plots = [anomaly_plot, zone3_plot, zone4_plot]
661
+
662
+ def select_region(selection: gr.SelectData):
663
+ """
664
+ Handles the region selection event.
665
+
666
+ Args:
667
+ selection (gr.SelectData): The data from the selection event.
668
+
669
+ Returns:
670
+ List[gr.Plot.update]: A list of update instructions for each plot.
671
+ """
672
+ if selection is None or selection.index is None:
673
+ return [gr.Plot.update() for _ in plots]
674
+
675
+ min_x, max_x = selection.index
676
+ # Update the x_lim for each plot
677
+ return [gr.ScatterPlot(x_lim=(min_x, max_x)) for _ in plots]
678
+
679
+ def reset_region():
680
+ """
681
+ Resets the x-axis limits for all plots.
682
+
683
+ Returns:
684
+ List[gr.Plot.update]: A list of update instructions to reset x_lim.
685
+ """
686
+ return [gr.ScatterPlot(x_lim=None) for _ in plots]
687
+
688
+ # Attach event listeners to each plot
689
+ for plot in plots:
690
+ plot.select(
691
+ select_region,
692
+ inputs=None,
693
+ outputs=plots # Update all plots
694
+ )
695
+ plot.double_click(
696
+ reset_region,
697
+ inputs=None,
698
+ outputs=plots # Reset all plots
699
+ )
700
+
701
+ # plots = [plt, first_plot, second_plot]
702
+
703
+ # def select_region(selection: gr.SelectData):
704
+ # min_w, max_w = selection.index
705
+ # return gr.ScatterPlot(x_lim=(min_w, max_w))
706
+
707
+ # for p in plots:
708
+ # p.select(select_region, None, plots)
709
+ # p.double_click(lambda: [gr.LinePlot(x_lim=None)] * len(plots), None, plots)
710
+
711
+ # second_plot.select(select_second_region, None, plt)
712
+ # second_plot.double_click(lambda: gr.ScatterPlot(x_lim=None), None, plt)
713
+ # gr.Column([anomaly_plot, first_plot, second_plot])
714
+
715
+ # anomaly_info = gr.Markdown("Anomaly detected around October 15, 2023")
716
+ with gr.Column():
717
+ query = gr.Textbox(label="Enter your question about the data",
718
+ value="Plot the anomaly_score as a function of time and highlight the highest 20 values")
719
+ query_button = gr.Button("Submit Data Query")
720
+ with gr.Row():
721
+ with gr.Column(visible=False) as output_col1:
722
+ out1 = gr.Textbox(label="Response")
723
+ with gr.Column(visible=False) as output_col2:
724
+ out2 = gr.Image(label="Plot")
725
+ with gr.Column(visible=False) as output_col3:
726
+ out3 = gr.DataFrame(label="DataFrame")
727
+ query_button.click(
728
+ fn=process_query,
729
+ inputs=query,
730
+ outputs=[
731
+ out1, # Text output
732
+ out2, # Image output
733
+ out3, # DataFrame output
734
+ output_col1, # Visibility for Text output
735
+ output_col2, # Visibility for Image output
736
+ output_col3 # Visibility for DataFrame output
737
+ ],
738
+ api_name="process_query"
739
+ )
740
+
741
+ # hide visibility until its ready
742
+
743
+
744
+ # Weather input
745
+ # with gr.Row():
746
+ # iface = gradio_app()
747
+
748
+
749
+ demo.launch(share=True)
assistant_avatar.png ADDED
dfcleaned.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259fb5ae73274f98b857a80939adcbc6b4da6c2b6aed26e8f3686262e7200a9c
3
+ size 12556650
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ gradio-client==1.3.0
3
+ gradio_pdf==0.0.15
4
+ matplotlib==3.7.5
5
+ numpy==1.23.5
6
+ openai==1.51.0
7
+ pandas==1.5.3
8
+ pandasai==2.2.15
9
+ pyautogen==0.3.0
10
+ python-dotenv==1.0.1
11
+ Requests==2.32.3
12
+ typing_extensions==4.12.2
13
+ tavily-python==0.5.0
shaps.csv ADDED
The diff for this file is too large to render. See raw diff
 
usedpdfs/AEDG50-LargeHospitals-2012-20120509.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2169122c4ef25a3bfe61a90f31a3100f61252311ad3d87f0edd4967f191a9ddb
3
+ size 26333583
usedpdfs/AEDGZE-SmallMedOfc-2019-20190614.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aefe8d828c337ea02ff4bfbcc0ad15472f6a9ecf9f17c25181495d5b2b680d3c
3
+ size 46009655
usedpdfs/FCM-VCM-VCS FLASHING Suggestion - 2024.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65a5ea3a75ded827bfdde4e26a7c41f232177cb8705949b938f7a83b56fe16f3
3
+ size 1164360
usedpdfs/PNNL-20761.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99bad5ffd0bd537237740d5114a6bd1ae14044eac87fc7fb12a72cb0a31a80c8
3
+ size 3818302
usedpdfs/gbpn-finaloct-2012.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f58622dd74856dc0c5f9eac7475406bd323452c302febb051d35daf4590895
3
+ size 4411700
usedpdfs/heat-pump-guide-ap-series.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ecf7dd9a4f320a3221f6106c4d83e6343cd761d87fc24c4dc3edb110f2c7892
3
+ size 6502158
usedpdfs/nea-smart-2-0-consumer.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9819cd1518e3134f977b59e007719411978dc81494480cdd1ea3474347b4badb
3
+ size 5497162
usedpdfs/resitable.pdf ADDED
Binary file (102 kB). View file
 
usedpdfs/s41597-024-03770-7.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e97503c62672c2357359395b41b117e3b57892881097b0b9981477ed6cb9657
3
+ size 4763859
user_avatar.png ADDED