Spaces:

mgbam
/

Gumbo_Board

Sleeping

File size: 7,112 Bytes

import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import os
import re

# --- Configuration ---
# We no longer initialize the LLM here. We will do it on-demand.

# --- Core Scraping & Analysis Functions (Unchanged) ---

def fetch_html(url):
    """Fetches HTML content from a URL with a browser-like user-agent."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None

def analyze_onpage_seo(soup):
    """Analyzes the on-page SEO elements of a webpage."""
    title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
    description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
    headings = {'h1': [], 'h2': [], 'h3': []}
    for h_tag in ['h1', 'h2', 'h3']:
        for tag in soup.find_all(h_tag):
            headings[h_tag].append(tag.get_text(strip=True))
    word_count = len(soup.get_text().split())
    return {"title": title, "description": description, "headings": headings, "word_count": word_count}

def analyze_tech_stack(soup, html):
    """Performs a very basic tech stack analysis."""
    tech = set()
    if "react.js" in html or 'data-reactroot' in html: tech.add("React")
    if "vue.js" in html: tech.add("Vue.js")
    if "angular.js" in html: tech.add("Angular")
    if "wp-content" in html: tech.add("WordPress")
    if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
    if "GTM-" in html: tech.add("Google Tag Manager")
    if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
    return list(tech) if tech else ["Basic HTML/CSS"]

# --- AI Summary Generation (Now with Robust Initialization) ---

def generate_ai_summary(url, seo_data, tech_data):
    """Generates a high-level strategic summary using an LLM."""
    
    # ** THE FIX: Lazy Initialization **
    # We initialize the LLM inside the function, only when it's needed.
    api_key = os.environ.get("GEMINI_API_KEY")
    
    # Provide a clear error message if the key is missing.
    if not api_key:
        return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."

    try:
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
        
        prompt = f"""
        You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.

        ## On-Page SEO Analysis:
        - **Title:** {seo_data['title']}
        - **Meta Description:** {seo_data['description']}
        - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
        - **Page Word Count:** {seo_data['word_count']}

        ## Technology Stack:
        - {', '.join(tech_data)}

        ## Your Analysis:
        Provide a 3-bullet point summary covering:
        1.  **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
        2.  **Their Target Audience:** Who are they talking to?
        3.  **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
        """
        response = llm.invoke([HumanMessage(content=prompt)])
        return response.content
    except Exception as e:
        # Provide a more specific error if the API call itself fails
        return f"Could not generate AI summary. The API call failed: {e}"

# --- The Main Orchestrator (Unchanged) ---

def competitor_teardown(url):
    """The main function that runs the entire analysis pipeline."""
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
    
    yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    
    html = fetch_html(url)
    if not html:
        yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
        return

    soup = BeautifulSoup(html, 'html.parser')
    
    yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    seo_data = analyze_onpage_seo(soup)
    tech_data = analyze_tech_stack(soup, html)
    
    yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    ai_summary = generate_ai_summary(url, seo_data, tech_data)

    seo_md = f"""
    ### 👑 SEO & Content
    | Metric | Value |
    | :--- | :--- |
    | **Page Title** | `{seo_data['title']}` |
    | **Meta Description** | `{seo_data['description']}` |
    | **Word Count** | `{seo_data['word_count']}` |
    
    #### Heading Structure:
    - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
    - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
    """
    
    tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
    
    yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)

# --- Gradio UI (Unchanged) ---
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
    gr.Markdown("# 🕵️ Gumbo Board: The Instant Competitor Teardown")
    gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")

    with gr.Row():
        url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
        submit_btn = gr.Button("Analyze", variant="primary", scale=1)

    with gr.Tabs():
        with gr.TabItem("🧠 AI Summary"):
            summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
        with gr.TabItem("🔍 On-Page SEO"):
            seo_output = gr.Markdown()
        with gr.TabItem("⚙️ Tech Stack"):
            tech_output = gr.Markdown()
        with gr.TabItem("📢 Ads & Keywords (Coming Soon)"):
            ads_output = gr.Markdown()
        with gr.TabItem("Social Presence (Coming Soon)"):
            social_output = gr.Markdown()

    submit_btn.click(
        fn=competitor_teardown,
        inputs=[url_input],
        outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
    )
    
    gr.Markdown("---")
    gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[🚀 Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")

demo.launch()