File size: 7,112 Bytes
e0da2eb
 
 
 
 
 
 
 
 
 
656a257
 
e0da2eb
656a257
e0da2eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656a257
e0da2eb
 
 
 
 
 
 
 
 
 
 
 
 
656a257
 
e0da2eb
 
656a257
 
 
 
 
 
 
 
 
e0da2eb
656a257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0da2eb
 
 
656a257
 
e0da2eb
656a257
e0da2eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656a257
e0da2eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import os
import re

# --- Configuration ---
# We no longer initialize the LLM here. We will do it on-demand.

# --- Core Scraping & Analysis Functions (Unchanged) ---

def fetch_html(url):
    """Fetches HTML content from a URL with a browser-like user-agent."""
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None

def analyze_onpage_seo(soup):
    """Analyzes the on-page SEO elements of a webpage."""
    title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
    description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
    headings = {'h1': [], 'h2': [], 'h3': []}
    for h_tag in ['h1', 'h2', 'h3']:
        for tag in soup.find_all(h_tag):
            headings[h_tag].append(tag.get_text(strip=True))
    word_count = len(soup.get_text().split())
    return {"title": title, "description": description, "headings": headings, "word_count": word_count}

def analyze_tech_stack(soup, html):
    """Performs a very basic tech stack analysis."""
    tech = set()
    if "react.js" in html or 'data-reactroot' in html: tech.add("React")
    if "vue.js" in html: tech.add("Vue.js")
    if "angular.js" in html: tech.add("Angular")
    if "wp-content" in html: tech.add("WordPress")
    if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
    if "GTM-" in html: tech.add("Google Tag Manager")
    if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
    return list(tech) if tech else ["Basic HTML/CSS"]

# --- AI Summary Generation (Now with Robust Initialization) ---

def generate_ai_summary(url, seo_data, tech_data):
    """Generates a high-level strategic summary using an LLM."""
    
    # ** THE FIX: Lazy Initialization **
    # We initialize the LLM inside the function, only when it's needed.
    api_key = os.environ.get("GEMINI_API_KEY")
    
    # Provide a clear error message if the key is missing.
    if not api_key:
        return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."

    try:
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
        
        prompt = f"""
        You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.

        ## On-Page SEO Analysis:
        - **Title:** {seo_data['title']}
        - **Meta Description:** {seo_data['description']}
        - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
        - **Page Word Count:** {seo_data['word_count']}

        ## Technology Stack:
        - {', '.join(tech_data)}

        ## Your Analysis:
        Provide a 3-bullet point summary covering:
        1.  **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
        2.  **Their Target Audience:** Who are they talking to?
        3.  **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
        """
        response = llm.invoke([HumanMessage(content=prompt)])
        return response.content
    except Exception as e:
        # Provide a more specific error if the API call itself fails
        return f"Could not generate AI summary. The API call failed: {e}"

# --- The Main Orchestrator (Unchanged) ---

def competitor_teardown(url):
    """The main function that runs the entire analysis pipeline."""
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
    
    yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    
    html = fetch_html(url)
    if not html:
        yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
        return

    soup = BeautifulSoup(html, 'html.parser')
    
    yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    seo_data = analyze_onpage_seo(soup)
    tech_data = analyze_tech_stack(soup, html)
    
    yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
    ai_summary = generate_ai_summary(url, seo_data, tech_data)

    seo_md = f"""
    ### πŸ‘‘ SEO & Content
    | Metric | Value |
    | :--- | :--- |
    | **Page Title** | `{seo_data['title']}` |
    | **Meta Description** | `{seo_data['description']}` |
    | **Word Count** | `{seo_data['word_count']}` |
    
    #### Heading Structure:
    - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
    - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
    """
    
    tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
    
    yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)

# --- Gradio UI (Unchanged) ---
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
    gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
    gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")

    with gr.Row():
        url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
        submit_btn = gr.Button("Analyze", variant="primary", scale=1)

    with gr.Tabs():
        with gr.TabItem("🧠 AI Summary"):
            summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
        with gr.TabItem("πŸ” On-Page SEO"):
            seo_output = gr.Markdown()
        with gr.TabItem("βš™οΈ Tech Stack"):
            tech_output = gr.Markdown()
        with gr.TabItem("πŸ“’ Ads & Keywords (Coming Soon)"):
            ads_output = gr.Markdown()
        with gr.TabItem("Social Presence (Coming Soon)"):
            social_output = gr.Markdown()

    submit_btn.click(
        fn=competitor_teardown,
        inputs=[url_input],
        outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
    )
    
    gr.Markdown("---")
    gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")

demo.launch()