cyberandy commited on
Commit
eef1ed6
·
verified ·
1 Parent(s): 06bf063

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -65
app.py CHANGED
@@ -4,13 +4,14 @@ from bs4 import BeautifulSoup
4
  import os
5
  import json
6
  import logging
7
- import pandas as pd # Useful for creating the dataframe output
8
- from typing import Optional, List, Dict, Any # Add this line
 
 
9
 
10
  # ------------------------
11
  # Configuration
12
  # ------------------------
13
-
14
  WORDLIFT_API_URL = "https://api.wordlift.io/content-evaluations"
15
  WORDLIFT_API_KEY = os.getenv("WORDLIFT_API_KEY") # Get API key from environment variable
16
 
@@ -38,6 +39,9 @@ body {
38
  max-width: 1200px; /* Limit width for better readability */
39
  margin: auto;
40
  }
 
 
 
41
  """
42
 
43
  theme = gr.themes.Soft(
@@ -68,35 +72,45 @@ def fetch_content_from_url(url: str, timeout: int = 15) -> str:
68
  headers = {
69
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
70
  }
71
- response = requests.get(url, headers=headers, timeout=timeout)
72
- response.raise_for_status() # Raise an exception for bad status codes
 
 
 
 
 
 
 
 
73
 
74
- soup = BeautifulSoup(response.content, 'html.parser')
75
 
76
  # Attempt to find main content block
77
- main_content = soup.find('main') or soup.find('article')
 
 
78
 
79
  if main_content:
80
  # Extract text from common text-containing tags within the main block
81
- text_elements = main_content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote'])
82
  text = ' '.join([elem.get_text() for elem in text_elements])
83
  else:
84
  # Fallback to extracting text from body if no main block found
85
- text_elements = soup.body.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote'])
86
  text = ' '.join([elem.get_text() for elem in text_elements])
87
- logger.warning(f"No <main> or <article> found for {url}, extracting from body.")
88
-
89
 
90
  # Clean up extra whitespace
91
  text = ' '.join(text.split())
92
 
93
- # Limit text length to avoid excessively large API calls (adjust as needed)
94
- max_text_length = 150000 # approx 25k words, adjust based on API limits/cost
 
95
  if len(text) > max_text_length:
96
- logger.warning(f"Content for {url} is too long ({len(text)} chars), truncating to {max_text_length} chars.")
97
- text = text[:max_text_length] + "..." # Indicate truncation
98
 
99
- return text
100
 
101
  except requests.exceptions.RequestException as e:
102
  logger.error(f"Failed to fetch content from {url}: {e}")
@@ -115,8 +129,8 @@ def call_wordlift_api(text: str, keywords: Optional[List[str]] = None) -> Option
115
  logger.error("WORDLIFT_API_KEY environment variable not set.")
116
  return {"error": "API key not configured."}
117
 
118
- if not text:
119
- return {"error": "No content provided or fetched."}
120
 
121
  payload = {
122
  "text": text,
@@ -132,28 +146,98 @@ def call_wordlift_api(text: str, keywords: Optional[List[str]] = None) -> Option
132
  logger.info(f"Calling WordLift API with text length {len(text)} and {len(keywords or [])} keywords.")
133
 
134
  try:
135
- response = requests.post(WORDLIFT_API_URL, headers=headers, json=payload, timeout=60) # Increased timeout
136
  response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
137
  return response.json()
138
 
139
  except requests.exceptions.HTTPError as e:
140
- logger.error(f"WordLift API HTTP error: {e.response.status_code} - {e.response.text}")
141
  try:
142
  error_detail = e.response.json()
143
  except json.JSONDecodeError:
144
  error_detail = e.response.text
145
  return {"error": f"API returned status code {e.response.status_code}", "details": error_detail}
 
 
 
146
  except requests.exceptions.RequestException as e:
147
- logger.error(f"WordLift API request error: {e}")
148
  return {"error": f"API request failed: {e}"}
149
  except Exception as e:
150
  logger.error(f"Unexpected error during API call: {e}")
151
  return {"error": f"An unexpected error occurred: {e}"}
152
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  # ------------------------
155
  # Main Evaluation Batch Function
156
  # ------------------------
 
157
  def evaluate_urls_batch(url_data: pd.DataFrame):
158
  """
159
  Evaluates a batch of URLs using the WordLift API.
@@ -165,26 +249,54 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
165
  A tuple containing:
166
  - A pandas DataFrame with the summary results.
167
  - A dictionary containing the full results (including errors) keyed by URL.
 
168
  """
169
- # Corrected check: Use .empty to see if the DataFrame has any rows
170
  if url_data.empty:
171
  logger.info("Input DataFrame is empty. Returning empty results.")
172
- return pd.DataFrame(columns=['URL', 'Status', 'Overall Score', 'Content Purpose', 'Content Accuracy', 'Content Depth', 'Readability Score (API)', 'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details']), {}
 
 
 
 
 
 
173
 
174
  summary_results = []
175
  full_results = {}
176
 
177
- for index, row in url_data.iterrows():
178
- url = row['URL'].strip()
179
- keywords_str = row['Target Keywords (comma-separated)'].strip() if row['Target Keywords (comma-separated)'] else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  keywords = [kw.strip() for kw in keywords_str.split(',') if kw.strip()]
181
 
 
 
 
 
 
 
182
  if not url:
183
- summary_results.append([url, "Skipped", None, None, None, None, None, None, None, None, "Empty URL"])
184
- full_results[url if url else f"Row_{index}"] = {"status": "Skipped", "error": "Empty URL input."}
185
- continue
 
186
 
187
- logger.info(f"Processing URL: {url} with keywords: {keywords}")
188
 
189
  # 1. Fetch Content
190
  content = fetch_content_from_url(url)
@@ -192,9 +304,9 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
192
  if content is None or not content.strip():
193
  status = "Failed"
194
  error_msg = "Failed to fetch or extract content."
195
- summary_results.append([url, status, None, None, None, None, None, None, None, None, error_msg])
196
- full_results[url] = {"status": status, "error": error_msg}
197
- logger.error(f"Processing failed for {url}: {error_msg}")
198
  continue # Move to next URL
199
 
200
  # 2. Call WordLift API
@@ -211,19 +323,29 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
211
  seo_breakdown = breakdown.get('seo', {})
212
  metadata = api_result.get('metadata', {})
213
 
 
 
 
 
 
 
 
 
 
 
214
  summary_row.extend([
215
  status,
216
- qs.get('overall', None),
217
- content_breakdown.get('purpose', None),
218
- content_breakdown.get('accuracy', None),
219
- content_breakdown.get('depth', None),
220
- readability_breakdown.get('score', None), # API's readability score (e.g. 2.5)
221
- readability_breakdown.get('grade_level', None),
222
- seo_breakdown.get('score', None),
223
- metadata.get('word_count', None),
224
  None # No error
225
  ])
226
- full_results[url] = api_result # Store full API result
227
 
228
  else:
229
  status = "Failed"
@@ -231,14 +353,35 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
231
  details = api_result.get("details", "") if api_result else ""
232
  summary_row.extend([
233
  status,
234
- None, None, None, None, None, None, None, None,
235
  f"{error_msg} {details}"
236
  ])
237
- full_results[url] = {"status": status, "error": error_msg, "details": details}
238
- logger.error(f"API call failed for {url}: {error_msg} {details}")
239
 
240
  summary_results.append(summary_row)
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  # Create pandas DataFrame for summary output
243
  summary_df = pd.DataFrame(summary_results, columns=[
244
  'URL', 'Status', 'Overall Score', 'Content Purpose',
@@ -246,18 +389,12 @@ def evaluate_urls_batch(url_data: pd.DataFrame):
246
  'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'
247
  ])
248
 
249
- # Format numeric columns for display if they are not None
250
- for col in ['Overall Score', 'Content Purpose', 'Content Accuracy', 'Content Depth', 'Readability Score (API)', 'Readability Grade Level', 'SEO Score', 'Word Count']:
251
- if col in summary_df.columns:
252
- # Convert to numeric, coercing errors, then format
253
- summary_df[col] = pd.to_numeric(summary_df[col], errors='coerce')
254
- if col in ['Overall Score', 'Readability Score (API)', 'SEO Score']:
255
- summary_df[col] = summary_df[col].apply(lambda x: f'{x:.1f}' if pd.notna(x) else '-')
256
- else:
257
- summary_df[col] = summary_df[col].apply(lambda x: f'{int(x)}' if pd.notna(x) else '-')
258
 
259
 
260
- return summary_df, full_results
261
 
262
  # ------------------------
263
  # Gradio Blocks Interface Setup
@@ -272,33 +409,48 @@ with gr.Blocks(css=css, theme=theme) as demo:
272
  )
273
 
274
  with gr.Row():
275
- with gr.Column():
276
  url_input_df = gr.Dataframe(
277
  headers=["URL", "Target Keywords (comma-separated)"],
278
  datatype=["str", "str"],
279
  row_count=(1, 30), # Allow adding rows up to 30
280
  col_count=(2, "fixed"),
281
- value=[["https://example.com/article1", "keyword A, keyword B"], ["https://example.com/article2", ""]], # Default examples
 
 
 
 
 
282
  label="URLs and Keywords"
283
  )
284
  submit_button = gr.Button("Evaluate All URLs", elem_classes=["primary-btn"])
285
 
286
- gr.Markdown("## Evaluation Results")
 
 
 
 
 
287
 
288
  with gr.Column():
289
  summary_output_df = gr.DataFrame(
290
  label="Summary Results",
291
- headers=['URL', 'Status', 'Overall Score', 'Content Purpose', 'Content Accuracy', 'Content Depth', 'Readability Score (API)', 'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'],
292
- datatype=["str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "str"], # Use str to handle '-' for missing values
 
 
 
293
  wrap=True # Wrap text in columns
294
  )
295
  with gr.Accordion("Full JSON Results", open=False):
296
- full_results_json = gr.JSON(label="Raw API Results per URL")
 
297
 
298
  submit_button.click(
299
  fn=evaluate_urls_batch,
300
  inputs=[url_input_df],
301
- outputs=[summary_output_df, full_results_json]
 
302
  )
303
 
304
  # Launch the app
@@ -314,9 +466,9 @@ if __name__ == "__main__":
314
  logger.error(" # import dotenv; dotenv.load_dotenv()")
315
  logger.error(" # in your script before getting the key.")
316
  logger.error("----------------------------------------------------------\n")
317
- # Optionally exit or raise error here if the key is strictly required to launch
318
- # exit()
319
- pass # Allow launching, but API calls will fail
320
 
321
  logger.info("Launching Gradio app...")
 
 
322
  demo.launch()
 
4
  import os
5
  import json
6
  import logging
7
+ import pandas as pd
8
+ import numpy as np # Added for mean calculation
9
+ import matplotlib.pyplot as plt # Added for plotting
10
+ from typing import Optional, List, Dict, Any
11
 
12
  # ------------------------
13
  # Configuration
14
  # ------------------------
 
15
  WORDLIFT_API_URL = "https://api.wordlift.io/content-evaluations"
16
  WORDLIFT_API_KEY = os.getenv("WORDLIFT_API_KEY") # Get API key from environment variable
17
 
 
39
  max-width: 1200px; /* Limit width for better readability */
40
  margin: auto;
41
  }
42
+ .plot-container {
43
+ min-height: 400px; /* Ensure plot area is visible */
44
+ }
45
  """
46
 
47
  theme = gr.themes.Soft(
 
72
  headers = {
73
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
74
  }
75
+ # Use stream=True and then process content to handle large files efficiently,
76
+ # though BeautifulSoup will load it all eventually. Timeout is for connection.
77
+ with requests.get(url, headers=headers, timeout=timeout, stream=True) as response:
78
+ response.raise_for_status() # Raise an exception for bad status codes
79
+
80
+ # Limit the amount of data read to avoid excessive memory usage
81
+ max_bytes_to_read = 2 * 1024 * 1024 # 2MB limit for initial read
82
+ content = response.content[:max_bytes_to_read]
83
+ if len(response.content) > max_bytes_to_read:
84
+ logger.warning(f"Content for {url} is larger than {max_bytes_to_read} bytes, reading truncated content.")
85
 
86
+ soup = BeautifulSoup(content, 'html.parser')
87
 
88
  # Attempt to find main content block
89
+ # Prioritize more specific semantic tags
90
+ main_content = soup.find('article') or soup.find('main') or soup.find(class_=lambda x: x and ('content' in x.lower() or 'article' in x.lower()))
91
+
92
 
93
  if main_content:
94
  # Extract text from common text-containing tags within the main block
95
+ text_elements = main_content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption'])
96
  text = ' '.join([elem.get_text() for elem in text_elements])
97
  else:
98
  # Fallback to extracting text from body if no main block found
99
+ text_elements = soup.body.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'blockquote', 'figcaption'])
100
  text = ' '.join([elem.get_text() for elem in text_elements])
101
+ logger.warning(f"No specific content tags (<article>, <main>, etc.) found for {url}, extracting from body.")
 
102
 
103
  # Clean up extra whitespace
104
  text = ' '.join(text.split())
105
 
106
+ # Limit text length *after* extraction and cleaning
107
+ # Adjust based on API limits/cost. WordLift's typical text APIs handle up to ~1M chars.
108
+ max_text_length = 1000000
109
  if len(text) > max_text_length:
110
+ logger.warning(f"Extracted text for {url} is too long ({len(text)} chars), truncating to {max_text_length} chars.")
111
+ text = text[:max_text_length]
112
 
113
+ return text.strip() if text else None # Return None if text is empty after processing
114
 
115
  except requests.exceptions.RequestException as e:
116
  logger.error(f"Failed to fetch content from {url}: {e}")
 
129
  logger.error("WORDLIFT_API_KEY environment variable not set.")
130
  return {"error": "API key not configured."}
131
 
132
+ if not text or not text.strip():
133
+ return {"error": "No significant content to evaluate."}
134
 
135
  payload = {
136
  "text": text,
 
146
  logger.info(f"Calling WordLift API with text length {len(text)} and {len(keywords or [])} keywords.")
147
 
148
  try:
149
+ response = requests.post(WORDLIFT_API_URL, headers=headers, json=payload, timeout=90) # Increased timeout again
150
  response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
151
  return response.json()
152
 
153
  except requests.exceptions.HTTPError as e:
154
+ logger.error(f"WordLift API HTTP error for {e.request.url}: {e.response.status_code} - {e.response.text}")
155
  try:
156
  error_detail = e.response.json()
157
  except json.JSONDecodeError:
158
  error_detail = e.response.text
159
  return {"error": f"API returned status code {e.response.status_code}", "details": error_detail}
160
+ except requests.exceptions.Timeout as e:
161
+ logger.error(f"WordLift API request timed out for {e.request.url}: {e}")
162
+ return {"error": f"API request timed out."}
163
  except requests.exceptions.RequestException as e:
164
+ logger.error(f"WordLift API request error for {e.request.url}: {e}")
165
  return {"error": f"API request failed: {e}"}
166
  except Exception as e:
167
  logger.error(f"Unexpected error during API call: {e}")
168
  return {"error": f"An unexpected error occurred: {e}"}
169
 
170
 
171
+ # ------------------------
172
+ # Plotting Logic
173
+ # ------------------------
174
+
175
+ def plot_average_radar(average_scores: Dict[str, float], avg_overall: Optional[float]) -> Any:
176
+ """Return a radar (spider) plot as a Matplotlib figure showing average scores."""
177
+
178
+ if not average_scores or all(v is None for v in average_scores.values()):
179
+ # Return a placeholder figure if no valid data is available
180
+ fig, ax = plt.subplots(figsize=(6, 6))
181
+ ax.text(0.5, 0.5, "No successful evaluations to plot.", horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=12)
182
+ ax.axis('off') # Hide axes
183
+ plt.title("Average Content Quality Scores", size=16, y=1.05)
184
+ plt.tight_layout()
185
+ return fig
186
+
187
+
188
+ categories = list(average_scores.keys())
189
+ values = [average_scores[cat] for cat in categories]
190
+
191
+ # Ensure values are floats, replace None with 0 for plotting
192
+ values = [float(v) if v is not None else 0 for v in values]
193
+
194
+ num_vars = len(categories)
195
+ # Calculate angles for the radar chart
196
+ angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
197
+ angles += angles[:1] # Complete the circle
198
+ values += values[:1] # Complete the circle for values
199
+
200
+ fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(projection='polar'))
201
+
202
+ line_color = '#3452DB'
203
+ fill_color = '#A1A7AF'
204
+ background_color = '#F6F6F7'
205
+ annotation_color = '#191919'
206
+
207
+ # Plot data
208
+ ax.plot(angles, values, 'o-', linewidth=2, color=line_color, label='Average Scores')
209
+ ax.fill(angles, values, alpha=0.4, color=fill_color)
210
+
211
+ # Set tick locations and labels
212
+ ax.set_xticks(angles[:-1])
213
+ ax.set_xticklabels(categories, color=line_color, fontsize=10)
214
+
215
+ # Set y-axis limits. Max score is 100.
216
+ ax.set_ylim(0, 100)
217
+
218
+ # Draw grid lines and axes
219
+ ax.grid(True, alpha=0.5, color=fill_color)
220
+ ax.set_facecolor(background_color)
221
+
222
+ # Add score annotations next to points
223
+ for angle, value, category in zip(angles[:-1], values[:-1], categories):
224
+ # Adjust position slightly so text doesn't overlap the point/line
225
+ # Radius adjustment can be tricky; let's just add text at the point for simplicity
226
+ ax.text(angle, value + 5, f'{value:.1f}', color=annotation_color,
227
+ horizontalalignment='center', verticalalignment='bottom' if value > 50 else 'top', fontsize=9)
228
+
229
+
230
+ # Add title
231
+ overall_title = f'Average Content Quality Scores\nOverall: {avg_overall:.1f}/100' if avg_overall is not None else 'Average Content Quality Scores'
232
+ plt.title(overall_title, size=16, y=1.1, color=annotation_color)
233
+
234
+ plt.tight_layout()
235
+ return fig
236
+
237
  # ------------------------
238
  # Main Evaluation Batch Function
239
  # ------------------------
240
+
241
  def evaluate_urls_batch(url_data: pd.DataFrame):
242
  """
243
  Evaluates a batch of URLs using the WordLift API.
 
249
  A tuple containing:
250
  - A pandas DataFrame with the summary results.
251
  - A dictionary containing the full results (including errors) keyed by URL.
252
+ - A Matplotlib figure for the average radar chart.
253
  """
254
+ # Check if the DataFrame has any rows (correct way using .empty)
255
  if url_data.empty:
256
  logger.info("Input DataFrame is empty. Returning empty results.")
257
+ # Return empty summary DF, empty full results, and an empty placeholder plot
258
+ empty_summary_df = pd.DataFrame(columns=[
259
+ 'URL', 'Status', 'Overall Score', 'Content Purpose',
260
+ 'Content Accuracy', 'Content Depth', 'Readability Score (API)',
261
+ 'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'
262
+ ])
263
+ return empty_summary_df, {}, plot_average_radar(None, None) # Pass None to plotting function
264
 
265
  summary_results = []
266
  full_results = {}
267
 
268
+ # Lists to store scores for calculating averages
269
+ purpose_scores = []
270
+ accuracy_scores = []
271
+ depth_scores = []
272
+ readability_scores = []
273
+ seo_scores = []
274
+ overall_scores = []
275
+
276
+ # Ensure columns exist, add them if not (though Dataframe component should enforce this)
277
+ # Using .get() with default None is safer if columns might sometimes be missing
278
+ urls = url_data.get('URL', pd.Series(dtype=str))
279
+ keywords_col = url_data.get('Target Keywords (comma-separated)', pd.Series(dtype=str))
280
+
281
+
282
+ for index, url in enumerate(urls):
283
+ url = url.strip() if pd.notna(url) else ""
284
+ keywords_str = keywords_col.iloc[index].strip() if pd.notna(keywords_col.iloc[index]) else ""
285
  keywords = [kw.strip() for kw in keywords_str.split(',') if kw.strip()]
286
 
287
+ # Generate a unique key for full_results, especially if URL is empty or duplicate
288
+ result_key = url if url else f"Row_{index}"
289
+ # Ensure unique key in case of duplicate empty URLs, maybe use index always?
290
+ result_key = f"Row_{index}_{url}" if url else f"Row_{index}"
291
+
292
+
293
  if not url:
294
+ summary_results.append(["", "Skipped", "-", "-", "-", "-", "-", "-", "-", "-", "Empty URL"])
295
+ full_results[result_key] = {"status": "Skipped", "error": "Empty URL input."}
296
+ logger.warning(f"Skipping evaluation for row {index}: Empty URL")
297
+ continue # Move to next URL
298
 
299
+ logger.info(f"Processing URL: {url} (Row {index}) with keywords: {keywords}")
300
 
301
  # 1. Fetch Content
302
  content = fetch_content_from_url(url)
 
304
  if content is None or not content.strip():
305
  status = "Failed"
306
  error_msg = "Failed to fetch or extract content."
307
+ summary_results.append([url, status, "-", "-", "-", "-", "-", "-", "-", "-", error_msg])
308
+ full_results[result_key] = {"status": status, "error": error_msg}
309
+ logger.error(f"Processing failed for {url} (Row {index}): {error_msg}")
310
  continue # Move to next URL
311
 
312
  # 2. Call WordLift API
 
323
  seo_breakdown = breakdown.get('seo', {})
324
  metadata = api_result.get('metadata', {})
325
 
326
+ # Append scores for average calculation (only for successful calls)
327
+ purpose_scores.append(content_breakdown.get('purpose'))
328
+ accuracy_scores.append(content_breakdown.get('accuracy'))
329
+ depth_scores.append(content_breakdown.get('depth'))
330
+ readability_scores.append(readability_breakdown.get('score')) # API's readability score (e.g. 2.5)
331
+ seo_scores.append(seo_breakdown.get('score'))
332
+ overall_scores.append(qs.get('overall'))
333
+
334
+
335
+ # Append data for the summary table row
336
  summary_row.extend([
337
  status,
338
+ f'{qs.get("overall", "-"): .1f}',
339
+ f'{content_breakdown.get("purpose", "-"): .0f}', # Assuming integer scores
340
+ f'{content_breakdown.get("accuracy", "-"): .0f}', # Assuming integer scores
341
+ f'{content_breakdown.get("depth", "-"): .0f}', # Assuming integer scores
342
+ f'{readability_breakdown.get("score", "-"): .1f}',
343
+ f'{readability_breakdown.get("grade_level", "-"): .0f}', # Assuming integer grade
344
+ f'{seo_breakdown.get("score", "-"): .1f}',
345
+ f'{metadata.get("word_count", "-"): .0f}', # Assuming integer word count
346
  None # No error
347
  ])
348
+ full_results[result_key] = api_result # Store full API result
349
 
350
  else:
351
  status = "Failed"
 
353
  details = api_result.get("details", "") if api_result else ""
354
  summary_row.extend([
355
  status,
356
+ "-", "-", "-", "-", "-", "-", "-", "-",
357
  f"{error_msg} {details}"
358
  ])
359
+ full_results[result_key] = {"status": status, "error": error_msg, "details": details}
360
+ logger.error(f"API call failed for {url} (Row {index}): {error_msg} {details}")
361
 
362
  summary_results.append(summary_row)
363
 
364
+ # Calculate Averages *after* processing all URLs
365
+ avg_purpose = np.nanmean(purpose_scores) if purpose_scores else None # Use nanmean to ignore None/NaN
366
+ avg_accuracy = np.nanmean(accuracy_scores) if accuracy_scores else None
367
+ avg_depth = np.nanmean(depth_scores) if depth_scores else None
368
+ avg_readability = np.nanmean(readability_scores) if readability_scores else None
369
+ avg_seo = np.nanmean(seo_scores) if seo_scores else None
370
+ avg_overall = np.nanmean(overall_scores) if overall_scores else None
371
+
372
+ # Prepare scores for the radar plot function
373
+ average_scores_dict = {
374
+ 'Purpose': avg_purpose,
375
+ 'Accuracy': avg_accuracy,
376
+ 'Depth': avg_depth,
377
+ 'Readability': avg_readability,
378
+ 'SEO': avg_seo
379
+ }
380
+
381
+ # Generate the average radar plot
382
+ average_radar_fig = plot_average_radar(average_scores_dict, avg_overall)
383
+
384
+
385
  # Create pandas DataFrame for summary output
386
  summary_df = pd.DataFrame(summary_results, columns=[
387
  'URL', 'Status', 'Overall Score', 'Content Purpose',
 
389
  'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'
390
  ])
391
 
392
+ # Note: Formatting is already done when creating the summary_row list above
393
+ # using f-strings like f'{value: .1f}' or f'{value: .0f}', and setting '-' for None.
394
+ # This ensures that pandas DataFrame displays formatted strings directly.
 
 
 
 
 
 
395
 
396
 
397
+ return summary_df, full_results, average_radar_fig # Return the plot too
398
 
399
  # ------------------------
400
  # Gradio Blocks Interface Setup
 
409
  )
410
 
411
  with gr.Row():
412
+ with gr.Column(scale=1):
413
  url_input_df = gr.Dataframe(
414
  headers=["URL", "Target Keywords (comma-separated)"],
415
  datatype=["str", "str"],
416
  row_count=(1, 30), # Allow adding rows up to 30
417
  col_count=(2, "fixed"),
418
+ value=[
419
+ ["https://www.wordlift.io/blog/google-helpful-content-update-2023/", "helpful content, google update"],
420
+ ["https://www.wordlift.io/blog/what-is-a-knowledge-graph/", "knowledge graph, semantic web"],
421
+ ["https://www.example.com/non-existent-page", ""], # Example of a failing URL
422
+ ["", ""] # Example of an empty row
423
+ ], # Default examples
424
  label="URLs and Keywords"
425
  )
426
  submit_button = gr.Button("Evaluate All URLs", elem_classes=["primary-btn"])
427
 
428
+ with gr.Column(scale=1, elem_classes="plot-container"):
429
+ # New component for the average radar plot
430
+ average_radar_output = gr.Plot(label="Average Content Quality Scores Radar")
431
+
432
+
433
+ gr.Markdown("## Detailed Results")
434
 
435
  with gr.Column():
436
  summary_output_df = gr.DataFrame(
437
  label="Summary Results",
438
+ # Data types are all string now because we formatted them with f-strings to include '-'
439
+ headers=['URL', 'Status', 'Overall Score', 'Content Purpose',
440
+ 'Content Accuracy', 'Content Depth', 'Readability Score (API)',
441
+ 'Readability Grade Level', 'SEO Score', 'Word Count', 'Error/Details'],
442
+ datatype=["str"] * 11,
443
  wrap=True # Wrap text in columns
444
  )
445
  with gr.Accordion("Full JSON Results", open=False):
446
+ # Changed the output type to gr.JSON
447
+ full_results_json = gr.JSON(label="Raw API Results per URL (or Error)")
448
 
449
  submit_button.click(
450
  fn=evaluate_urls_batch,
451
  inputs=[url_input_df],
452
+ # Updated outputs to include the average radar plot
453
+ outputs=[summary_output_df, full_results_json, average_radar_output]
454
  )
455
 
456
  # Launch the app
 
466
  logger.error(" # import dotenv; dotenv.load_dotenv()")
467
  logger.error(" # in your script before getting the key.")
468
  logger.error("----------------------------------------------------------\n")
469
+ # You might want to sys.exit(1) here if the API key is mandatory
 
 
470
 
471
  logger.info("Launching Gradio app...")
472
+ # Consider using share=True for easy sharing, but be mindful of security/costs
473
+ # demo.launch(share=True)
474
  demo.launch()