Spaces:
Sleeping
Sleeping
File size: 7,112 Bytes
e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb 656a257 e0da2eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
import os
import re
# --- Configuration ---
# We no longer initialize the LLM here. We will do it on-demand.
# --- Core Scraping & Analysis Functions (Unchanged) ---
def fetch_html(url):
"""Fetches HTML content from a URL with a browser-like user-agent."""
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
return None
def analyze_onpage_seo(soup):
"""Analyzes the on-page SEO elements of a webpage."""
title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
headings = {'h1': [], 'h2': [], 'h3': []}
for h_tag in ['h1', 'h2', 'h3']:
for tag in soup.find_all(h_tag):
headings[h_tag].append(tag.get_text(strip=True))
word_count = len(soup.get_text().split())
return {"title": title, "description": description, "headings": headings, "word_count": word_count}
def analyze_tech_stack(soup, html):
"""Performs a very basic tech stack analysis."""
tech = set()
if "react.js" in html or 'data-reactroot' in html: tech.add("React")
if "vue.js" in html: tech.add("Vue.js")
if "angular.js" in html: tech.add("Angular")
if "wp-content" in html: tech.add("WordPress")
if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
if "GTM-" in html: tech.add("Google Tag Manager")
if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
return list(tech) if tech else ["Basic HTML/CSS"]
# --- AI Summary Generation (Now with Robust Initialization) ---
def generate_ai_summary(url, seo_data, tech_data):
"""Generates a high-level strategic summary using an LLM."""
# ** THE FIX: Lazy Initialization **
# We initialize the LLM inside the function, only when it's needed.
api_key = os.environ.get("GEMINI_API_KEY")
# Provide a clear error message if the key is missing.
if not api_key:
return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
try:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
prompt = f"""
You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
## On-Page SEO Analysis:
- **Title:** {seo_data['title']}
- **Meta Description:** {seo_data['description']}
- **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
- **Page Word Count:** {seo_data['word_count']}
## Technology Stack:
- {', '.join(tech_data)}
## Your Analysis:
Provide a 3-bullet point summary covering:
1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
2. **Their Target Audience:** Who are they talking to?
3. **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
"""
response = llm.invoke([HumanMessage(content=prompt)])
return response.content
except Exception as e:
# Provide a more specific error if the API call itself fails
return f"Could not generate AI summary. The API call failed: {e}"
# --- The Main Orchestrator (Unchanged) ---
def competitor_teardown(url):
"""The main function that runs the entire analysis pipeline."""
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
html = fetch_html(url)
if not html:
yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
return
soup = BeautifulSoup(html, 'html.parser')
yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
seo_data = analyze_onpage_seo(soup)
tech_data = analyze_tech_stack(soup, html)
yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
ai_summary = generate_ai_summary(url, seo_data, tech_data)
seo_md = f"""
### π SEO & Content
| Metric | Value |
| :--- | :--- |
| **Page Title** | `{seo_data['title']}` |
| **Meta Description** | `{seo_data['description']}` |
| **Word Count** | `{seo_data['word_count']}` |
#### Heading Structure:
- **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
- **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
"""
tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
# --- Gradio UI (Unchanged) ---
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
gr.Markdown("# π΅οΈ Gumbo Board: The Instant Competitor Teardown")
gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
with gr.Row():
url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
submit_btn = gr.Button("Analyze", variant="primary", scale=1)
with gr.Tabs():
with gr.TabItem("π§ AI Summary"):
summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
with gr.TabItem("π On-Page SEO"):
seo_output = gr.Markdown()
with gr.TabItem("βοΈ Tech Stack"):
tech_output = gr.Markdown()
with gr.TabItem("π’ Ads & Keywords (Coming Soon)"):
ads_output = gr.Markdown()
with gr.TabItem("Social Presence (Coming Soon)"):
social_output = gr.Markdown()
submit_btn.click(
fn=competitor_teardown,
inputs=[url_input],
outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
)
gr.Markdown("---")
gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[π Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
demo.launch() |