Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly.express as px | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import HumanMessage | |
import os | |
import re | |
# --- Configuration --- | |
# We no longer initialize the LLM here. We will do it on-demand. | |
# --- Core Scraping & Analysis Functions (Unchanged) --- | |
def fetch_html(url): | |
"""Fetches HTML content from a URL with a browser-like user-agent.""" | |
try: | |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} | |
response = requests.get(url, headers=headers, timeout=10) | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as e: | |
print(f"Error fetching {url}: {e}") | |
return None | |
def analyze_onpage_seo(soup): | |
"""Analyzes the on-page SEO elements of a webpage.""" | |
title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A" | |
description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A" | |
headings = {'h1': [], 'h2': [], 'h3': []} | |
for h_tag in ['h1', 'h2', 'h3']: | |
for tag in soup.find_all(h_tag): | |
headings[h_tag].append(tag.get_text(strip=True)) | |
word_count = len(soup.get_text().split()) | |
return {"title": title, "description": description, "headings": headings, "word_count": word_count} | |
def analyze_tech_stack(soup, html): | |
"""Performs a very basic tech stack analysis.""" | |
tech = set() | |
if "react.js" in html or 'data-reactroot' in html: tech.add("React") | |
if "vue.js" in html: tech.add("Vue.js") | |
if "angular.js" in html: tech.add("Angular") | |
if "wp-content" in html: tech.add("WordPress") | |
if "gtag('config'" in html: tech.add("Google Analytics (GA4)") | |
if "GTM-" in html: tech.add("Google Tag Manager") | |
if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS") | |
return list(tech) if tech else ["Basic HTML/CSS"] | |
# --- AI Summary Generation (Now with Robust Initialization) --- | |
def generate_ai_summary(url, seo_data, tech_data): | |
"""Generates a high-level strategic summary using an LLM.""" | |
# ** THE FIX: Lazy Initialization ** | |
# We initialize the LLM inside the function, only when it's needed. | |
api_key = os.environ.get("GEMINI_API_KEY") | |
# Provide a clear error message if the key is missing. | |
if not api_key: | |
return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it." | |
try: | |
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key) | |
prompt = f""" | |
You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary. | |
## On-Page SEO Analysis: | |
- **Title:** {seo_data['title']} | |
- **Meta Description:** {seo_data['description']} | |
- **H1 Headings:** {', '.join(seo_data['headings']['h1'])} | |
- **Page Word Count:** {seo_data['word_count']} | |
## Technology Stack: | |
- {', '.join(tech_data)} | |
## Your Analysis: | |
Provide a 3-bullet point summary covering: | |
1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure? | |
2. **Their Target Audience:** Who are they talking to? | |
3. **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity? | |
""" | |
response = llm.invoke([HumanMessage(content=prompt)]) | |
return response.content | |
except Exception as e: | |
# Provide a more specific error if the API call itself fails | |
return f"Could not generate AI summary. The API call failed: {e}" | |
# --- The Main Orchestrator (Unchanged) --- | |
def competitor_teardown(url): | |
"""The main function that runs the entire analysis pipeline.""" | |
if not url.startswith(('http://', 'https://')): | |
url = 'https://' + url | |
yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False) | |
html = fetch_html(url) | |
if not html: | |
yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True) | |
return | |
soup = BeautifulSoup(html, 'html.parser') | |
yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False) | |
seo_data = analyze_onpage_seo(soup) | |
tech_data = analyze_tech_stack(soup, html) | |
yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False) | |
ai_summary = generate_ai_summary(url, seo_data, tech_data) | |
seo_md = f""" | |
### π SEO & Content | |
| Metric | Value | | |
| :--- | :--- | | |
| **Page Title** | `{seo_data['title']}` | | |
| **Meta Description** | `{seo_data['description']}` | | |
| **Word Count** | `{seo_data['word_count']}` | | |
#### Heading Structure: | |
- **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])} | |
- **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found | |
""" | |
tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data]) | |
yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True) | |
# --- Gradio UI (Unchanged) --- | |
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo: | |
gr.Markdown("# π΅οΈ Gumbo Board: The Instant Competitor Teardown") | |
gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*") | |
with gr.Row(): | |
url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4) | |
submit_btn = gr.Button("Analyze", variant="primary", scale=1) | |
with gr.Tabs(): | |
with gr.TabItem("π§ AI Summary"): | |
summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.") | |
with gr.TabItem("π On-Page SEO"): | |
seo_output = gr.Markdown() | |
with gr.TabItem("βοΈ Tech Stack"): | |
tech_output = gr.Markdown() | |
with gr.TabItem("π’ Ads & Keywords (Coming Soon)"): | |
ads_output = gr.Markdown() | |
with gr.TabItem("Social Presence (Coming Soon)"): | |
social_output = gr.Markdown() | |
submit_btn.click( | |
fn=competitor_teardown, | |
inputs=[url_input], | |
outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn] | |
) | |
gr.Markdown("---") | |
gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[π Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**") | |
demo.launch() |