nbroad commited on
Commit
82d9f36
·
verified ·
1 Parent(s): bd08f1f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +50 -5
app.py CHANGED
@@ -191,26 +191,49 @@ async def get_provider_data(provider: str):
191
  async def get_historical_data():
192
  """API endpoint to get historical data for line chart"""
193
  if not HF_TOKEN:
194
- return {"error": "Historical data not available", "data": []}
 
 
 
 
 
195
 
196
  try:
197
  # Load historical dataset
198
  dataset = load_dataset(DATASET_REPO_NAME, split="train")
199
  df = dataset.to_pandas()
200
 
 
 
 
 
 
 
 
 
 
 
201
  # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
202
  df['timestamp'] = pd.to_datetime(df['timestamp'])
203
  df = df.sort_values('timestamp')
204
 
205
  # Get last 48 hours of data (48 data points max for performance)
206
  cutoff_time = datetime.now(timezone.utc) - pd.Timedelta(hours=48)
207
- df = df[df['timestamp'] >= cutoff_time]
 
 
 
 
 
 
 
208
 
209
  # Prepare data for Chart.js line chart
210
  historical_data = {}
 
211
 
212
  for provider in PROVIDERS:
213
- provider_data = df[df['provider'] == provider].copy()
214
  if not provider_data.empty:
215
  # Format for Chart.js: {x: timestamp, y: value}
216
  historical_data[provider] = [
@@ -220,17 +243,39 @@ async def get_historical_data():
220
  }
221
  for _, row in provider_data.iterrows()
222
  ]
 
223
  else:
224
  historical_data[provider] = []
225
 
 
 
226
  return {
227
  "historical_data": historical_data,
228
- "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
 
229
  }
230
 
231
  except Exception as e:
232
  logger.error(f"Error fetching historical data: {e}")
233
- return {"error": "Failed to fetch historical data", "data": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  @app.post("/api/collect-now")
236
  async def trigger_data_collection(background_tasks: BackgroundTasks):
 
191
  async def get_historical_data():
192
  """API endpoint to get historical data for line chart"""
193
  if not HF_TOKEN:
194
+ logger.warning("No HF_TOKEN available for historical data")
195
+ return {
196
+ "error": "Historical data not available - no HF token",
197
+ "historical_data": {},
198
+ "message": "Historical data collection requires HuggingFace token"
199
+ }
200
 
201
  try:
202
  # Load historical dataset
203
  dataset = load_dataset(DATASET_REPO_NAME, split="train")
204
  df = dataset.to_pandas()
205
 
206
+ logger.info(f"Loaded dataset with {len(df)} total records")
207
+
208
+ if df.empty:
209
+ logger.info("Dataset is empty - no historical data available yet")
210
+ return {
211
+ "historical_data": {},
212
+ "message": "No historical data available yet. Data collection is running - check back in 30 minutes.",
213
+ "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
214
+ }
215
+
216
  # Group by timestamp and provider, get the latest entry for each timestamp-provider combo
217
  df['timestamp'] = pd.to_datetime(df['timestamp'])
218
  df = df.sort_values('timestamp')
219
 
220
  # Get last 48 hours of data (48 data points max for performance)
221
  cutoff_time = datetime.now(timezone.utc) - pd.Timedelta(hours=48)
222
+ df_filtered = df[df['timestamp'] >= cutoff_time]
223
+
224
+ logger.info(f"Filtered to {len(df_filtered)} records in last 48 hours")
225
+
226
+ # If no recent data, use all available data for initial display
227
+ if df_filtered.empty:
228
+ logger.info("No data in last 48 hours, using all available data")
229
+ df_filtered = df.tail(100) # Use last 100 records
230
 
231
  # Prepare data for Chart.js line chart
232
  historical_data = {}
233
+ total_data_points = 0
234
 
235
  for provider in PROVIDERS:
236
+ provider_data = df_filtered[df_filtered['provider'] == provider].copy()
237
  if not provider_data.empty:
238
  # Format for Chart.js: {x: timestamp, y: value}
239
  historical_data[provider] = [
 
243
  }
244
  for _, row in provider_data.iterrows()
245
  ]
246
+ total_data_points += len(historical_data[provider])
247
  else:
248
  historical_data[provider] = []
249
 
250
+ logger.info(f"Returning {total_data_points} total data points across {len([p for p in historical_data.values() if p])} providers")
251
+
252
  return {
253
  "historical_data": historical_data,
254
+ "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
255
+ "total_data_points": total_data_points,
256
+ "data_range": f"Last {len(df_filtered)} records" if not df_filtered.empty else "No data"
257
  }
258
 
259
  except Exception as e:
260
  logger.error(f"Error fetching historical data: {e}")
261
+ # Try to create initial data if dataset doesn't exist
262
+ if "does not exist" in str(e).lower() or "not found" in str(e).lower():
263
+ logger.info("Dataset doesn't exist yet, triggering initial data collection")
264
+ try:
265
+ await collect_and_store_data()
266
+ return {
267
+ "historical_data": {},
268
+ "message": "Dataset created! Historical data will appear after a few data collection cycles.",
269
+ "last_updated": datetime.now().strftime('%Y-%m-%d %H:%M:%S')
270
+ }
271
+ except Exception as create_error:
272
+ logger.error(f"Failed to create initial dataset: {create_error}")
273
+
274
+ return {
275
+ "error": f"Failed to fetch historical data: {str(e)}",
276
+ "historical_data": {},
277
+ "message": "Historical data temporarily unavailable"
278
+ }
279
 
280
  @app.post("/api/collect-now")
281
  async def trigger_data_collection(background_tasks: BackgroundTasks):