Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -52,38 +52,38 @@ class EnhancedOceanClimateAgent:
|
|
52 |
}
|
53 |
|
54 |
try:
|
55 |
-
print(f"
|
56 |
-
print(f"
|
57 |
|
58 |
response = requests.get(self.noaa_base_url, params=params, timeout=30)
|
59 |
|
60 |
if response.status_code != 200:
|
61 |
-
print(f"
|
62 |
return None
|
63 |
|
64 |
data = response.json()
|
65 |
|
66 |
if 'data' in data and data['data']:
|
67 |
-
print(f"
|
68 |
return pd.DataFrame(data['data'])
|
69 |
elif 'error' in data:
|
70 |
-
print(f"
|
71 |
return None
|
72 |
else:
|
73 |
-
print(f"
|
74 |
return None
|
75 |
|
76 |
except requests.exceptions.Timeout:
|
77 |
-
print(f"
|
78 |
return None
|
79 |
except requests.exceptions.RequestException as e:
|
80 |
-
print(f"
|
81 |
return None
|
82 |
except json.JSONDecodeError as e:
|
83 |
-
print(f"
|
84 |
return None
|
85 |
except Exception as e:
|
86 |
-
print(f"
|
87 |
return None
|
88 |
|
89 |
def get_comprehensive_station_data(self, station_name, days_back=30):
|
@@ -96,8 +96,8 @@ class EnhancedOceanClimateAgent:
|
|
96 |
end_date = datetime.utcnow() - timedelta(hours=2) # 2 hour buffer
|
97 |
start_date = end_date - timedelta(days=days_back)
|
98 |
|
99 |
-
print(f"
|
100 |
-
print(f"
|
101 |
|
102 |
# Priority order - start with most reliable products
|
103 |
products_to_fetch = [
|
@@ -112,20 +112,20 @@ class EnhancedOceanClimateAgent:
|
|
112 |
success_count = 0
|
113 |
|
114 |
for product_name, product_code in products_to_fetch:
|
115 |
-
print(f"
|
116 |
data = self.get_noaa_data(station_id, product_code, start_date, end_date)
|
117 |
|
118 |
if data is not None and not data.empty:
|
119 |
all_data[product_name] = data
|
120 |
success_count += 1
|
121 |
-
print(f"
|
122 |
else:
|
123 |
-
print(f"
|
124 |
|
125 |
if success_count == 0:
|
126 |
return None, f"No data available for station {station_name} in the specified time period. This could be due to: station maintenance, data processing delays, or the station may not support the requested data types."
|
127 |
|
128 |
-
print(f"
|
129 |
return all_data, f"Successfully retrieved {success_count}/{len(products_to_fetch)} data types"
|
130 |
|
131 |
def process_noaa_data(self, raw_data):
|
@@ -141,7 +141,7 @@ class EnhancedOceanClimateAgent:
|
|
141 |
df['datetime'] = pd.to_datetime(df['t'])
|
142 |
df['water_level'] = pd.to_numeric(df['v'], errors='coerce')
|
143 |
base_df = df[['datetime', 'water_level']].copy()
|
144 |
-
print(f"
|
145 |
|
146 |
# If no water level, try other datasets
|
147 |
if base_df is None:
|
@@ -156,7 +156,7 @@ class EnhancedOceanClimateAgent:
|
|
156 |
column_name = product_name.replace('_temperature', '_temp')
|
157 |
df[column_name] = pd.to_numeric(df['v'], errors='coerce')
|
158 |
base_df = df[['datetime', column_name]].copy()
|
159 |
-
print(f"
|
160 |
break
|
161 |
|
162 |
if base_df is None:
|
@@ -191,7 +191,7 @@ class EnhancedOceanClimateAgent:
|
|
191 |
# Sort by datetime and remove duplicates
|
192 |
base_df = base_df.sort_values('datetime').drop_duplicates(subset=['datetime'])
|
193 |
|
194 |
-
print(f"
|
195 |
return base_df
|
196 |
|
197 |
def detect_anomalies(self, data, column, window=24): # 24 hours for hourly data
|
@@ -266,7 +266,7 @@ class EnhancedOceanClimateAgent:
|
|
266 |
alerts.append(f"High {col.replace('_', ' ')} anomaly frequency: {anomaly_pct:.1f}% at {station_name}")
|
267 |
|
268 |
if not alerts:
|
269 |
-
alerts.append(f"
|
270 |
|
271 |
return analysis, alerts
|
272 |
|
@@ -279,12 +279,12 @@ def analyze_real_ocean_data(station_name, days_back, anomaly_sensitivity, use_re
|
|
279 |
agent.anomaly_threshold = anomaly_sensitivity
|
280 |
|
281 |
if use_real_data:
|
282 |
-
print(f"
|
283 |
# Fetch real NOAA data
|
284 |
raw_data, status_msg = agent.get_comprehensive_station_data(station_name, days_back)
|
285 |
|
286 |
if raw_data is None:
|
287 |
-
error_msg = f"
|
288 |
print(error_msg)
|
289 |
return None, None, None, error_msg, "No alerts - data unavailable", None
|
290 |
|
@@ -292,12 +292,12 @@ def analyze_real_ocean_data(station_name, days_back, anomaly_sensitivity, use_re
|
|
292 |
data = agent.process_noaa_data(raw_data)
|
293 |
|
294 |
if data is None or data.empty:
|
295 |
-
error_msg = "
|
296 |
print(error_msg)
|
297 |
return None, None, None, error_msg, "No alerts - data unavailable", None
|
298 |
|
299 |
-
data_source = f"
|
300 |
-
print(f"
|
301 |
|
302 |
else:
|
303 |
print("π§ Using synthetic demonstration data")
|
@@ -320,7 +320,7 @@ def analyze_real_ocean_data(station_name, days_back, anomaly_sensitivity, use_re
|
|
320 |
# Create CSV for download
|
321 |
csv_file_path = save_csv_temp(data)
|
322 |
|
323 |
-
print("
|
324 |
return fig1, fig2, fig3, analysis_text, alerts_text, csv_file_path
|
325 |
|
326 |
|
|
|
52 |
}
|
53 |
|
54 |
try:
|
55 |
+
print(f"Fetching {product} data for station {station_id}")
|
56 |
+
print(f"Date range: {begin_str} to {end_str}")
|
57 |
|
58 |
response = requests.get(self.noaa_base_url, params=params, timeout=30)
|
59 |
|
60 |
if response.status_code != 200:
|
61 |
+
print(f"HTTP Error {response.status_code}: {response.text}")
|
62 |
return None
|
63 |
|
64 |
data = response.json()
|
65 |
|
66 |
if 'data' in data and data['data']:
|
67 |
+
print(f"Successfully fetched {len(data['data'])} records for {product}")
|
68 |
return pd.DataFrame(data['data'])
|
69 |
elif 'error' in data:
|
70 |
+
print(f"NOAA API error for {product}: {data['error']['message']}")
|
71 |
return None
|
72 |
else:
|
73 |
+
print(f"No data returned for {product}")
|
74 |
return None
|
75 |
|
76 |
except requests.exceptions.Timeout:
|
77 |
+
print(f"Timeout fetching {product} data")
|
78 |
return None
|
79 |
except requests.exceptions.RequestException as e:
|
80 |
+
print(f"Request failed for {product}: {str(e)}")
|
81 |
return None
|
82 |
except json.JSONDecodeError as e:
|
83 |
+
print(f"JSON decode error for {product}: {str(e)}")
|
84 |
return None
|
85 |
except Exception as e:
|
86 |
+
print(f"Unexpected error fetching {product}: {str(e)}")
|
87 |
return None
|
88 |
|
89 |
def get_comprehensive_station_data(self, station_name, days_back=30):
|
|
|
96 |
end_date = datetime.utcnow() - timedelta(hours=2) # 2 hour buffer
|
97 |
start_date = end_date - timedelta(days=days_back)
|
98 |
|
99 |
+
print(f"Fetching data for {station_name} (ID: {station_id})")
|
100 |
+
print(f"Date range: {start_date} to {end_date}")
|
101 |
|
102 |
# Priority order - start with most reliable products
|
103 |
products_to_fetch = [
|
|
|
112 |
success_count = 0
|
113 |
|
114 |
for product_name, product_code in products_to_fetch:
|
115 |
+
print(f"Attempting to fetch {product_name}...")
|
116 |
data = self.get_noaa_data(station_id, product_code, start_date, end_date)
|
117 |
|
118 |
if data is not None and not data.empty:
|
119 |
all_data[product_name] = data
|
120 |
success_count += 1
|
121 |
+
print(f"{product_name}: {len(data)} records")
|
122 |
else:
|
123 |
+
print(f"{product_name}: No data available")
|
124 |
|
125 |
if success_count == 0:
|
126 |
return None, f"No data available for station {station_name} in the specified time period. This could be due to: station maintenance, data processing delays, or the station may not support the requested data types."
|
127 |
|
128 |
+
print(f"Successfully retrieved {success_count}/{len(products_to_fetch)} data types")
|
129 |
return all_data, f"Successfully retrieved {success_count}/{len(products_to_fetch)} data types"
|
130 |
|
131 |
def process_noaa_data(self, raw_data):
|
|
|
141 |
df['datetime'] = pd.to_datetime(df['t'])
|
142 |
df['water_level'] = pd.to_numeric(df['v'], errors='coerce')
|
143 |
base_df = df[['datetime', 'water_level']].copy()
|
144 |
+
print(f"Base dataset: water_level with {len(base_df)} records")
|
145 |
|
146 |
# If no water level, try other datasets
|
147 |
if base_df is None:
|
|
|
156 |
column_name = product_name.replace('_temperature', '_temp')
|
157 |
df[column_name] = pd.to_numeric(df['v'], errors='coerce')
|
158 |
base_df = df[['datetime', column_name]].copy()
|
159 |
+
print(f"Base dataset: {product_name} with {len(base_df)} records")
|
160 |
break
|
161 |
|
162 |
if base_df is None:
|
|
|
191 |
# Sort by datetime and remove duplicates
|
192 |
base_df = base_df.sort_values('datetime').drop_duplicates(subset=['datetime'])
|
193 |
|
194 |
+
print(f"Final processed dataset: {len(base_df)} records with {len(base_df.columns)-1} parameters")
|
195 |
return base_df
|
196 |
|
197 |
def detect_anomalies(self, data, column, window=24): # 24 hours for hourly data
|
|
|
266 |
alerts.append(f"High {col.replace('_', ' ')} anomaly frequency: {anomaly_pct:.1f}% at {station_name}")
|
267 |
|
268 |
if not alerts:
|
269 |
+
alerts.append(f"No significant anomalies detected at {station_name}")
|
270 |
|
271 |
return analysis, alerts
|
272 |
|
|
|
279 |
agent.anomaly_threshold = anomaly_sensitivity
|
280 |
|
281 |
if use_real_data:
|
282 |
+
print(f"Starting real data analysis for {station_name}")
|
283 |
# Fetch real NOAA data
|
284 |
raw_data, status_msg = agent.get_comprehensive_station_data(station_name, days_back)
|
285 |
|
286 |
if raw_data is None:
|
287 |
+
error_msg = f"Error fetching real data: {status_msg}"
|
288 |
print(error_msg)
|
289 |
return None, None, None, error_msg, "No alerts - data unavailable", None
|
290 |
|
|
|
292 |
data = agent.process_noaa_data(raw_data)
|
293 |
|
294 |
if data is None or data.empty:
|
295 |
+
error_msg = "No processable data available after fetching from NOAA"
|
296 |
print(error_msg)
|
297 |
return None, None, None, error_msg, "No alerts - data unavailable", None
|
298 |
|
299 |
+
data_source = f"Real NOAA data from {station_name} ({status_msg})"
|
300 |
+
print(f"{data_source}")
|
301 |
|
302 |
else:
|
303 |
print("π§ Using synthetic demonstration data")
|
|
|
320 |
# Create CSV for download
|
321 |
csv_file_path = save_csv_temp(data)
|
322 |
|
323 |
+
print("Analysis completed successfully")
|
324 |
return fig1, fig2, fig3, analysis_text, alerts_text, csv_file_path
|
325 |
|
326 |
|