cyberandy commited on
Commit
19d88f2
·
verified ·
1 Parent(s): 45f59b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -67
app.py CHANGED
@@ -5,8 +5,8 @@ import os
5
  import json
6
  import logging
7
  import pandas as pd
8
- import numpy as np # Added for mean calculation
9
- import matplotlib.pyplot as plt # Added for plotting
10
  from typing import Optional, List, Dict, Any
11
 
12
  # ------------------------
@@ -41,7 +41,16 @@ body {
41
  }
42
  .plot-container {
43
  min-height: 400px; /* Ensure plot area is visible */
 
 
 
44
  }
 
 
 
 
 
 
45
  """
46
 
47
  theme = gr.themes.Soft(
@@ -79,38 +88,56 @@ def fetch_content_from_url(url: str, timeout: int = 15) -> str:
79
 
80
  # Limit the amount of data read to avoid excessive memory usage
81
  max_bytes_to_read = 2 * 1024 * 1024 # 2MB limit for initial read
82
- content = response.content[:max_bytes_to_read]
83
- if len(response.content) > max_bytes_to_read:
84
- logger.warning(f"Content for {url} is larger than {max_bytes_to_read} bytes, reading truncated content.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  soup = BeautifulSoup(content, 'html.parser')
87
 
88
  # Attempt to find main content block
89
  # Prioritize more specific semantic tags
90
- main_content = soup.find('article') or soup.find('main') or soup.find(class_=lambda x: x and ('content' in x.lower() or 'article' in x.lower()))
 
91
 
92
 
93
  if main_content:
94
  # Extract text from common text-containing tags within the main block
95
- text_elements = main_content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption'])
96
  text = ' '.join([elem.get_text() for elem in text_elements])
97
  else:
98
  # Fallback to extracting text from body if no main block found
99
- text_elements = soup.body.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption'])
100
  text = ' '.join([elem.get_text() for elem in text_elements])
101
- logger.warning(f"No specific content tags (<article>, <main>, etc.) found for {url}, extracting from body.")
102
 
103
  # Clean up extra whitespace
104
  text = ' '.join(text.split())
105
 
106
  # Limit text length *after* extraction and cleaning
107
  # Adjust based on API limits/cost. WordLift's typical text APIs handle up to ~1M chars.
108
- max_text_length = 1000000
109
  if len(text) > max_text_length:
110
  logger.warning(f"Extracted text for {url} is too long ({len(text)} chars), truncating to {max_text_length} chars.")
111
  text = text[:max_text_length]
112
 
113
- return text.strip() if text else None # Return None if text is empty after processing
 
114
 
115
  except requests.exceptions.RequestException as e:
116
  logger.error(f"Failed to fetch content from {url}: {e}")
@@ -172,13 +199,14 @@ def call_wordlift_api(text: str, keywords: Optional[List[str]] = None) -> Option
172
  # Plotting Logic
173
  # ------------------------
174
 
175
- def plot_average_radar(average_scores: Dict[str, float], avg_overall: Optional[float]) -> Any:
176
  """Return a radar (spider) plot as a Matplotlib figure showing average scores."""
177
 
178
- if not average_scores or all(v is None for v in average_scores.values()):
 
179
  # Return a placeholder figure if no valid data is available
180
  fig, ax = plt.subplots(figsize=(6, 6))
181
- ax.text(0.5, 0.5, "No successful evaluations to plot.", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=12)
182
  ax.axis('off') # Hide axes
183
  plt.title("Average Content Quality Scores", size=16, y=1.05)
184
  plt.tight_layout()
@@ -186,16 +214,16 @@ def plot_average_radar(average_scores: Dict[str, float], avg_overall: Optional[f
186
 
187
 
188
  categories = list(average_scores.keys())
189
- values = [average_scores[cat] for cat in categories]
 
 
190
 
191
- # Ensure values are floats, replace None with 0 for plotting
192
- values = [float(v) if v is not None else 0 for v in values]
193
 
194
  num_vars = len(categories)
195
  # Calculate angles for the radar chart
196
  angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
197
  angles += angles[:1] # Complete the circle
198
- values += values[:1] # Complete the circle for values
199
 
200
  fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(projection='polar'))
201
 
@@ -205,8 +233,8 @@ def plot_average_radar(average_scores: Dict[str, float], avg_overall: Optional[f
205
  annotation_color = '#191919'
206
 
207
  # Plot data
208
- ax.plot(angles, values, 'o-', linewidth=2, color=line_color, label='Average Scores')
209
- ax.fill(angles, values, alpha=0.4, color=fill_color)
210
 
211
  # Set tick locations and labels
212
  ax.set_xticks(angles[:-1])
@@ -214,22 +242,27 @@ def plot_average_radar(average_scores: Dict[str, float], avg_overall: Optional[f
214
 
215
  # Set y-axis limits. Max score is 100.
216
  ax.set_ylim(0, 100)
 
217
 
218
  # Draw grid lines and axes
219
  ax.grid(True, alpha=0.5, color=fill_color)
220
  ax.set_facecolor(background_color)
221
 
222
- # Add score annotations next to points
223
- for angle, value, category in zip(angles[:-1], values[:-1], categories):
224
- # Adjust position slightly so text doesn't overlap the point/line
225
- # Radius adjustment can be tricky; let's just add text at the point for simplicity
226
- ax.text(angle, value + 5, f'{value:.1f}', color=annotation_color,
227
- horizontalalignment='center', verticalalignment='bottom' if value > 50 else 'top', fontsize=9)
 
 
 
 
228
 
229
 
230
- # Add title
231
- overall_title = f'Average Content Quality Scores\nOverall: {avg_overall:.1f}/100' if avg_overall is not None else 'Average Content Quality Scores'
232
- plt.title(overall_title, size=16, y=1.1, color=annotation_color)
233
 
234
  plt.tight_layout()
235
  return fig
@@ -260,40 +293,45 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
260
  'Content Accuracy', 'Content Depth', 'Readability Score (API)',
261
  'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'
262
  ])
263
- return empty_summary_df, {}, plot_average_radar(None, None) # Pass None to plotting function
264
 
265
  summary_results = []
266
  full_results = {}
267
 
268
  # Lists to store scores for calculating averages
 
269
  purpose_scores = []
270
  accuracy_scores = []
271
  depth_scores = []
272
- readability_scores = []
273
  seo_scores = []
274
  overall_scores = []
275
 
276
- # Ensure columns exist, add them if not (though Dataframe component should enforce this)
277
- # Using .get() with default None is safer if columns might sometimes be missing
278
- urls = url_data.get('URL', pd.Series(dtype=str))
279
- keywords_col = url_data.get('Target Keywords (comma-separated)', pd.Series(dtype=str))
280
 
281
 
282
  for index, url in enumerate(urls):
283
- url = url.strip() if pd.notna(url) else ""
284
- keywords_str = keywords_col.iloc[index].strip() if pd.notna(keywords_col.iloc[index]) else ""
285
  keywords = [kw.strip() for kw in keywords_str.split(',') if kw.strip()]
286
 
287
- # Generate a unique key for full_results, especially if URL is empty or duplicate
288
- result_key = url if url else f"Row_{index}"
289
- # Ensure unique key in case of duplicate empty URLs, maybe use index always?
290
- result_key = f"Row_{index}_{url}" if url else f"Row_{index}"
291
 
292
 
293
  if not url:
294
  summary_results.append(["", "Skipped", "-", "-", "-", "-", "-", "-", "-", "-", "Empty URL"])
295
  full_results[result_key] = {"status": "Skipped", "error": "Empty URL input."}
296
  logger.warning(f"Skipping evaluation for row {index}: Empty URL")
 
 
 
 
 
 
 
297
  continue # Move to next URL
298
 
299
  logger.info(f"Processing URL: {url} (Row {index}) with keywords: {keywords}")
@@ -307,6 +345,13 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
307
  summary_results.append([url, status, "-", "-", "-", "-", "-", "-", "-", "-", error_msg])
308
  full_results[result_key] = {"status": status, "error": error_msg}
309
  logger.error(f"Processing failed for {url} (Row {index}): {error_msg}")
 
 
 
 
 
 
 
310
  continue # Move to next URL
311
 
312
  # 2. Call WordLift API
@@ -324,25 +369,27 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
324
  metadata = api_result.get('metadata', {})
325
 
326
  # Append scores for average calculation (only for successful calls)
327
- purpose_scores.append(content_breakdown.get('purpose'))
328
- accuracy_scores.append(content_breakdown.get('accuracy'))
329
- depth_scores.append(content_breakdown.get('depth'))
330
- readability_scores.append(readability_breakdown.get('score')) # API's readability score (e.g. 2.5)
331
- seo_scores.append(seo_breakdown.get('score'))
332
- overall_scores.append(qs.get('overall'))
 
333
 
334
 
335
  # Append data for the summary table row
 
336
  summary_row.extend([
337
  status,
338
- f'{qs.get("overall", "-"): .1f}',
339
- f'{content_breakdown.get("purpose", "-"): .0f}', # Assuming integer scores
340
- f'{content_breakdown.get("accuracy", "-"): .0f}', # Assuming integer scores
341
- f'{content_breakdown.get("depth", "-"): .0f}', # Assuming integer scores
342
- f'{readability_breakdown.get("score", "-"): .1f}',
343
- f'{readability_breakdown.get("grade_level", "-"): .0f}', # Assuming integer grade
344
- f'{seo_breakdown.get("score", "-"): .1f}',
345
- f'{metadata.get("word_count", "-"): .0f}', # Assuming integer word count
346
  None # No error
347
  ])
348
  full_results[result_key] = api_result # Store full API result
@@ -359,15 +406,33 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
359
  full_results[result_key] = {"status": status, "error": error_msg, "details": details}
360
  logger.error(f"API call failed for {url} (Row {index}): {error_msg} {details}")
361
 
 
 
 
 
 
 
 
 
 
362
  summary_results.append(summary_row)
363
 
364
- # Calculate Averages *after* processing all URLs
365
- avg_purpose = np.nanmean(purpose_scores) if purpose_scores else None # Use nanmean to ignore None/NaN
366
- avg_accuracy = np.nanmean(accuracy_scores) if accuracy_scores else None
367
- avg_depth = np.nanmean(depth_scores) if depth_scores else None
368
- avg_readability = np.nanmean(readability_scores) if readability_scores else None
369
- avg_seo = np.nanmean(seo_scores) if seo_scores else None
370
- avg_overall = np.nanmean(overall_scores) if overall_scores else None
 
 
 
 
 
 
 
 
 
371
 
372
  # Prepare scores for the radar plot function
373
  average_scores_dict = {
@@ -393,7 +458,6 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
393
  # using f-strings like f'{value: .1f}' or f'{value: .0f}', and setting '-' for None.
394
  # This ensures that pandas DataFrame displays formatted strings directly.
395
 
396
-
397
  return summary_df, full_results, average_radar_fig # Return the plot too
398
 
399
  # ------------------------
@@ -416,11 +480,14 @@ with gr.Blocks(css=css, theme=theme) as demo:
416
  row_count=(1, 30), # Allow adding rows up to 30
417
  col_count=(2, "fixed"),
418
  value=[
419
- ["https://www.wordlift.io/blog/google-helpful-content-update-2023/", "helpful content, google update"],
420
- ["https://www.wordlift.io/blog/what-is-a-knowledge-graph/", "knowledge graph, semantic web"],
421
  ["https://www.example.com/non-existent-page", ""], # Example of a failing URL
422
- ["", ""] # Example of an empty row
423
- ], # Default examples
 
 
 
424
  label="URLs and Keywords"
425
  )
426
  submit_button = gr.Button("Evaluate All URLs", elem_classes=["primary-btn"])
@@ -467,6 +534,9 @@ if __name__ == "__main__":
467
  logger.error(" # in your script before getting the key.")
468
  logger.error("----------------------------------------------------------\n")
469
  # You might want to sys.exit(1) here if the API key is mandatory
 
 
 
470
 
471
  logger.info("Launching Gradio app...")
472
  # Consider using share=True for easy sharing, but be mindful of security/costs
 
5
  import json
6
  import logging
7
  import pandas as pd
8
+ import numpy as np
9
+ import matplotlib.pyplot as plt
10
  from typing import Optional, List, Dict, Any
11
 
12
  # ------------------------
 
41
  }
42
  .plot-container {
43
  min-height: 400px; /* Ensure plot area is visible */
44
+ display: flex;
45
+ justify-content: center; /* Center the plot */
46
+ align-items: center; /* Center vertically if needed */
47
  }
48
+ /* Specific style for the plot title to potentially reduce overlap */
49
+ .plot-container .gradio-html-title {
50
+ text-align: center;
51
+ width: 100%; /* Ensure title centers */
52
+ }
53
+
54
  """
55
 
56
  theme = gr.themes.Soft(
 
88
 
89
  # Limit the amount of data read to avoid excessive memory usage
90
  max_bytes_to_read = 2 * 1024 * 1024 # 2MB limit for initial read
91
+ # Read only up to max_bytes_to_read
92
+ content_bytes = b''
93
+ for chunk in response.iter_content(chunk_size=8192):
94
+ if not chunk:
95
+ break
96
+ content_bytes += chunk
97
+ if len(content_bytes) >= max_bytes_to_read:
98
+ logger.warning(f"Content for {url} exceeded {max_bytes_to_read} bytes, stopped reading.")
99
+ break
100
+
101
+ # Use detect_encoding if possible, fallback to utf-8
102
+ try:
103
+ # Attempt to get encoding from headers or detect it
104
+ encoding = requests.utils.get_encoding_from_headers(response.headers) or requests.utils.guess_json_utf(content_bytes)
105
+ content = content_bytes.decode(encoding, errors='replace')
106
+ except Exception as e:
107
+ logger.warning(f"Could not detect encoding for {url}, falling back to utf-8: {e}")
108
+ content = content_bytes.decode('utf-8', errors='replace')
109
+
110
 
111
  soup = BeautifulSoup(content, 'html.parser')
112
 
113
  # Attempt to find main content block
114
  # Prioritize more specific semantic tags
115
+ # Added some common class names as fallback
116
+ main_content = soup.find('article') or soup.find('main') or soup.find(class_=lambda x: x and ('content' in x.lower() or 'article' in x.lower() or 'post' in x.lower() or 'body' in x.lower()))
117
 
118
 
119
  if main_content:
120
  # Extract text from common text-containing tags within the main block
121
+ text_elements = main_content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption', 'pre', 'code'])
122
  text = ' '.join([elem.get_text() for elem in text_elements])
123
  else:
124
  # Fallback to extracting text from body if no main block found
125
+ text_elements = soup.body.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption', 'pre', 'code'])
126
  text = ' '.join([elem.get_text() for elem in text_elements])
127
+ logger.warning(f"No specific content tags (<article>, <main>, etc.) or common class names found for {url}, extracting from body.")
128
 
129
  # Clean up extra whitespace
130
  text = ' '.join(text.split())
131
 
132
  # Limit text length *after* extraction and cleaning
133
  # Adjust based on API limits/cost. WordLift's typical text APIs handle up to ~1M chars.
134
+ max_text_length = 1000000 # 1 Million characters
135
  if len(text) > max_text_length:
136
  logger.warning(f"Extracted text for {url} is too long ({len(text)} chars), truncating to {max_text_length} chars.")
137
  text = text[:max_text_length]
138
 
139
+ return text.strip() if text and text.strip() else None # Return None if text is empty after processing
140
+
141
 
142
  except requests.exceptions.RequestException as e:
143
  logger.error(f"Failed to fetch content from {url}: {e}")
 
199
  # Plotting Logic
200
  # ------------------------
201
 
202
+ def plot_average_radar(average_scores: Dict[str, Optional[float]], avg_overall: Optional[float]) -> Any:
203
  """Return a radar (spider) plot as a Matplotlib figure showing average scores."""
204
 
205
+ # Check if we have any valid scores to plot
206
+ if not average_scores or all(v is None or pd.isna(v) for v in average_scores.values()):
207
  # Return a placeholder figure if no valid data is available
208
  fig, ax = plt.subplots(figsize=(6, 6))
209
+ ax.text(0.5, 0.5, "No successful evaluations to plot\naverage scores.", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=12)
210
  ax.axis('off') # Hide axes
211
  plt.title("Average Content Quality Scores", size=16, y=1.05)
212
  plt.tight_layout()
 
214
 
215
 
216
  categories = list(average_scores.keys())
217
+ # Convert None/NaN values to 0 for plotting, but keep track of original for annotation
218
+ values_raw = [average_scores[cat] for cat in categories]
219
+ values_for_plot = [float(v) if v is not None and pd.notna(v) else 0 for v in values_raw]
220
 
 
 
221
 
222
  num_vars = len(categories)
223
  # Calculate angles for the radar chart
224
  angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
225
  angles += angles[:1] # Complete the circle
226
+ values_for_plot += values_for_plot[:1] # Complete the circle for values
227
 
228
  fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(projection='polar'))
229
 
 
233
  annotation_color = '#191919'
234
 
235
  # Plot data
236
+ ax.plot(angles, values_for_plot, 'o-', linewidth=2, color=line_color, label='Average Scores')
237
+ ax.fill(angles, values_for_plot, alpha=0.4, color=fill_color)
238
 
239
  # Set tick locations and labels
240
  ax.set_xticks(angles[:-1])
 
242
 
243
  # Set y-axis limits. Max score is 100.
244
  ax.set_ylim(0, 100)
245
+ ax.set_yticks([0, 20, 40, 60, 80, 100]) # Explicitly set y-ticks
246
 
247
  # Draw grid lines and axes
248
  ax.grid(True, alpha=0.5, color=fill_color)
249
  ax.set_facecolor(background_color)
250
 
251
+ # Add score annotations next to points - Use raw values if not None/NaN
252
+ for angle, value_raw, value_plotted in zip(angles[:-1], values_raw, values_for_plot[:-1]):
253
+ if value_raw is not None and pd.notna(value_raw):
254
+ # Adjust position slightly based on angle and value
255
+ # More sophisticated positioning needed for perfect placement, simple offset below
256
+ # Let's just add text slightly outside the point along the radial line
257
+ radius = value_plotted + 5 # Offset outward
258
+ # Ensure annotation stays within limits if needed, but 105 should be fine for ylim 100
259
+ ax.text(angle, radius, f'{value_raw:.1f}', color=annotation_color,
260
+ horizontalalignment='center', verticalalignment='center', fontsize=9)
261
 
262
 
263
+ # Add title - Only "Overall: XX.X/100" part
264
+ overall_title_text = f'Overall: {avg_overall:.1f}/100' if avg_overall is not None and pd.notna(avg_overall) else 'Overall: -'
265
+ plt.title(overall_title_text, size=16, y=1.1, color=annotation_color) # y=1.1 places it above the plot area
266
 
267
  plt.tight_layout()
268
  return fig
 
293
  'Content Accuracy', 'Content Depth', 'Readability Score (API)',
294
  'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'
295
  ])
296
+ return empty_summary_df, {}, plot_average_radar(None, None) # Pass None, None to plotting function
297
 
298
  summary_results = []
299
  full_results = {}
300
 
301
  # Lists to store scores for calculating averages
302
+ # Initialize with None/NaN to correctly handle empty inputs or failures
303
  purpose_scores = []
304
  accuracy_scores = []
305
  depth_scores = []
306
+ readability_scores = [] # Note: API returns float like 2.5
307
  seo_scores = []
308
  overall_scores = []
309
 
310
+ # Ensure columns exist and handle potential NaNs from the DataFrame input
311
+ urls = url_data.get('URL', pd.Series(dtype=str)).fillna('') # Replace NaN URLs with empty strings
312
+ keywords_col = url_data.get('Target Keywords (comma-separated)', pd.Series(dtype=str)).fillna('') # Replace NaN keywords with empty strings
 
313
 
314
 
315
  for index, url in enumerate(urls):
316
+ url = url.strip()
317
+ keywords_str = keywords_col.iloc[index].strip()
318
  keywords = [kw.strip() for kw in keywords_str.split(',') if kw.strip()]
319
 
320
+ # Generate a unique key for full_results based on index and URL/placeholder
321
+ result_key = f"Row_{index}" + (f": {url}" if url else "")
 
 
322
 
323
 
324
  if not url:
325
  summary_results.append(["", "Skipped", "-", "-", "-", "-", "-", "-", "-", "-", "Empty URL"])
326
  full_results[result_key] = {"status": "Skipped", "error": "Empty URL input."}
327
  logger.warning(f"Skipping evaluation for row {index}: Empty URL")
328
+ # Append None to scores lists for skipped/failed rows
329
+ purpose_scores.append(np.nan)
330
+ accuracy_scores.append(np.nan)
331
+ depth_scores.append(np.nan)
332
+ readability_scores.append(np.nan)
333
+ seo_scores.append(np.nan)
334
+ overall_scores.append(np.nan)
335
  continue # Move to next URL
336
 
337
  logger.info(f"Processing URL: {url} (Row {index}) with keywords: {keywords}")
 
345
  summary_results.append([url, status, "-", "-", "-", "-", "-", "-", "-", "-", error_msg])
346
  full_results[result_key] = {"status": status, "error": error_msg}
347
  logger.error(f"Processing failed for {url} (Row {index}): {error_msg}")
348
+ # Append None to scores lists for skipped/failed rows
349
+ purpose_scores.append(np.nan)
350
+ accuracy_scores.append(np.nan)
351
+ depth_scores.append(np.nan)
352
+ readability_scores.append(np.nan)
353
+ seo_scores.append(np.nan)
354
+ overall_scores.append(np.nan)
355
  continue # Move to next URL
356
 
357
  # 2. Call WordLift API
 
369
  metadata = api_result.get('metadata', {})
370
 
371
  # Append scores for average calculation (only for successful calls)
372
+ # Use .get() with None default, then convert to float, allowing NaN
373
+ purpose_scores.append(float(content_breakdown.get('purpose')) if content_breakdown.get('purpose') is not None else np.nan)
374
+ accuracy_scores.append(float(content_breakdown.get('accuracy')) if content_breakdown.get('accuracy') is not None else np.nan)
375
+ depth_scores.append(float(content_breakdown.get('depth')) if content_breakdown.get('depth') is not None else np.nan)
376
+ readability_scores.append(float(readability_breakdown.get('score')) if readability_breakdown.get('score') is not None else np.nan)
377
+ seo_scores.append(float(seo_breakdown.get('score')) if seo_breakdown.get('score') is not None else np.nan)
378
+ overall_scores.append(float(qs.get('overall')) if qs.get('overall') is not None else np.nan)
379
 
380
 
381
  # Append data for the summary table row
382
+ # Use .get() with '-' default for display
383
  summary_row.extend([
384
  status,
385
+ f'{qs.get("overall", "-"): .1f}' if qs.get('overall') is not None else "-",
386
+ f'{content_breakdown.get("purpose", "-"): .0f}' if content_breakdown.get('purpose') is not None else "-",
387
+ f'{content_breakdown.get("accuracy", "-"): .0f}' if content_breakdown.get('accuracy') is not None else "-",
388
+ f'{content_breakdown.get("depth", "-"): .0f}' if content_breakdown.get('depth') is not None else "-",
389
+ f'{readability_breakdown.get("score", "-"): .1f}' if readability_breakdown.get('score') is not None else "-",
390
+ f'{readability_breakdown.get("grade_level", "-"): .0f}' if readability_breakdown.get('grade_level') is not None else "-",
391
+ f'{seo_breakdown.get("score", "-"): .1f}' if seo_breakdown.get('score') is not None else "-",
392
+ f'{metadata.get("word_count", "-"): .0f}' if metadata.get('word_count') is not None else "-",
393
  None # No error
394
  ])
395
  full_results[result_key] = api_result # Store full API result
 
406
  full_results[result_key] = {"status": status, "error": error_msg, "details": details}
407
  logger.error(f"API call failed for {url} (Row {index}): {error_msg} {details}")
408
 
409
+ # Append None/NaN to scores lists for failed rows
410
+ purpose_scores.append(np.nan)
411
+ accuracy_scores.append(np.nan)
412
+ depth_scores.append(np.nan)
413
+ readability_scores.append(np.nan)
414
+ seo_scores.append(np.nan)
415
+ overall_scores.append(np.nan)
416
+
417
+
418
  summary_results.append(summary_row)
419
 
420
+ # Calculate Averages *after* processing all URLs, ignoring NaNs
421
+ avg_purpose = np.nanmean(purpose_scores)
422
+ avg_accuracy = np.nanmean(accuracy_scores)
423
+ avg_depth = np.nanmean(depth_scores)
424
+ avg_readability = np.nanmean(readability_scores)
425
+ avg_seo = np.nanmean(seo_scores)
426
+ avg_overall = np.nanmean(overall_scores)
427
+
428
+ # Convert potentially NaN averages to None if there were no valid scores
429
+ avg_purpose = avg_purpose if pd.notna(avg_purpose) else None
430
+ avg_accuracy = avg_accuracy if pd.notna(avg_accuracy) else None
431
+ avg_depth = avg_depth if pd.notna(avg_depth) else None
432
+ avg_readability = avg_readability if pd.notna(avg_readability) else None
433
+ avg_seo = avg_seo if pd.notna(avg_seo) else None
434
+ avg_overall = avg_overall if pd.notna(avg_overall) else None
435
+
436
 
437
  # Prepare scores for the radar plot function
438
  average_scores_dict = {
 
458
  # using f-strings like f'{value: .1f}' or f'{value: .0f}', and setting '-' for None.
459
  # This ensures that pandas DataFrame displays formatted strings directly.
460
 
 
461
  return summary_df, full_results, average_radar_fig # Return the plot too
462
 
463
  # ------------------------
 
480
  row_count=(1, 30), # Allow adding rows up to 30
481
  col_count=(2, "fixed"),
482
  value=[
483
+ ["https://wordlift.io/blog/en/query-fan-out-ai-search/", "query fan out, ai search, google, ai"], # Added first URL
484
+ ["https://wordlift.io/blog/en/entity/google-knowledge-graph/", "google knowledge graph, entity, semantic web, seo"], # Added second URL
485
  ["https://www.example.com/non-existent-page", ""], # Example of a failing URL
486
+ ["", ""], # Example of an empty row
487
+ ["", ""], # Add some extra empty rows for easier input
488
+ ["", ""],
489
+ ["", ""],
490
+ ],
491
  label="URLs and Keywords"
492
  )
493
  submit_button = gr.Button("Evaluate All URLs", elem_classes=["primary-btn"])
 
534
  logger.error(" # in your script before getting the key.")
535
  logger.error("----------------------------------------------------------\n")
536
  # You might want to sys.exit(1) here if the API key is mandatory
537
+ # import sys
538
+ # sys.exit(1)
539
+
540
 
541
  logger.info("Launching Gradio app...")
542
  # Consider using share=True for easy sharing, but be mindful of security/costs