Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,13 +8,9 @@ from langchain_core.messages import HumanMessage
|
|
8 |
import os
|
9 |
import re
|
10 |
|
11 |
-
#
|
12 |
-
#
|
13 |
-
|
14 |
-
# --- Core Scraping & Analysis Functions (Unchanged) ---
|
15 |
-
|
16 |
def fetch_html(url):
|
17 |
-
"""Fetches HTML content from a URL with a browser-like user-agent."""
|
18 |
try:
|
19 |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
20 |
response = requests.get(url, headers=headers, timeout=10)
|
@@ -25,7 +21,6 @@ def fetch_html(url):
|
|
25 |
return None
|
26 |
|
27 |
def analyze_onpage_seo(soup):
|
28 |
-
"""Analyzes the on-page SEO elements of a webpage."""
|
29 |
title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
|
30 |
description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
|
31 |
headings = {'h1': [], 'h2': [], 'h3': []}
|
@@ -36,7 +31,6 @@ def analyze_onpage_seo(soup):
|
|
36 |
return {"title": title, "description": description, "headings": headings, "word_count": word_count}
|
37 |
|
38 |
def analyze_tech_stack(soup, html):
|
39 |
-
"""Performs a very basic tech stack analysis."""
|
40 |
tech = set()
|
41 |
if "react.js" in html or 'data-reactroot' in html: tech.add("React")
|
42 |
if "vue.js" in html: tech.add("Vue.js")
|
@@ -47,34 +41,21 @@ def analyze_tech_stack(soup, html):
|
|
47 |
if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
|
48 |
return list(tech) if tech else ["Basic HTML/CSS"]
|
49 |
|
50 |
-
# --- AI Summary Generation (Now with Robust Initialization) ---
|
51 |
-
|
52 |
def generate_ai_summary(url, seo_data, tech_data):
|
53 |
-
"""Generates a high-level strategic summary using an LLM."""
|
54 |
-
|
55 |
-
# ** THE FIX: Lazy Initialization **
|
56 |
-
# We initialize the LLM inside the function, only when it's needed.
|
57 |
api_key = os.environ.get("GEMINI_API_KEY")
|
58 |
-
|
59 |
-
# Provide a clear error message if the key is missing.
|
60 |
if not api_key:
|
61 |
return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
|
62 |
-
|
63 |
try:
|
64 |
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
|
65 |
-
|
66 |
prompt = f"""
|
67 |
You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
|
68 |
-
|
69 |
## On-Page SEO Analysis:
|
70 |
- **Title:** {seo_data['title']}
|
71 |
- **Meta Description:** {seo_data['description']}
|
72 |
- **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
|
73 |
- **Page Word Count:** {seo_data['word_count']}
|
74 |
-
|
75 |
## Technology Stack:
|
76 |
- {', '.join(tech_data)}
|
77 |
-
|
78 |
## Your Analysis:
|
79 |
Provide a 3-bullet point summary covering:
|
80 |
1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
|
@@ -84,32 +65,22 @@ def generate_ai_summary(url, seo_data, tech_data):
|
|
84 |
response = llm.invoke([HumanMessage(content=prompt)])
|
85 |
return response.content
|
86 |
except Exception as e:
|
87 |
-
# Provide a more specific error if the API call itself fails
|
88 |
return f"Could not generate AI summary. The API call failed: {e}"
|
89 |
|
90 |
-
# --- The Main Orchestrator (Unchanged) ---
|
91 |
-
|
92 |
def competitor_teardown(url):
|
93 |
-
"""The main function that runs the entire analysis pipeline."""
|
94 |
if not url.startswith(('http://', 'https://')):
|
95 |
url = 'https://' + url
|
96 |
-
|
97 |
yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
98 |
-
|
99 |
html = fetch_html(url)
|
100 |
if not html:
|
101 |
yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
|
102 |
return
|
103 |
-
|
104 |
soup = BeautifulSoup(html, 'html.parser')
|
105 |
-
|
106 |
yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
107 |
seo_data = analyze_onpage_seo(soup)
|
108 |
tech_data = analyze_tech_stack(soup, html)
|
109 |
-
|
110 |
yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
111 |
ai_summary = generate_ai_summary(url, seo_data, tech_data)
|
112 |
-
|
113 |
seo_md = f"""
|
114 |
### π SEO & Content
|
115 |
| Metric | Value |
|
@@ -117,25 +88,20 @@ def competitor_teardown(url):
|
|
117 |
| **Page Title** | `{seo_data['title']}` |
|
118 |
| **Meta Description** | `{seo_data['description']}` |
|
119 |
| **Word Count** | `{seo_data['word_count']}` |
|
120 |
-
|
121 |
#### Heading Structure:
|
122 |
- **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
|
123 |
- **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
|
124 |
"""
|
125 |
-
|
126 |
tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
|
127 |
-
|
128 |
yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
|
129 |
|
130 |
-
# --- Gradio UI (Unchanged) ---
|
131 |
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
|
132 |
gr.Markdown("# π΅οΈ Gumbo Board: The Instant Competitor Teardown")
|
133 |
gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
|
134 |
-
|
135 |
with gr.Row():
|
136 |
url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
|
137 |
submit_btn = gr.Button("Analyze", variant="primary", scale=1)
|
138 |
-
|
139 |
with gr.Tabs():
|
140 |
with gr.TabItem("π§ AI Summary"):
|
141 |
summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
|
@@ -147,14 +113,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}")
|
|
147 |
ads_output = gr.Markdown()
|
148 |
with gr.TabItem("Social Presence (Coming Soon)"):
|
149 |
social_output = gr.Markdown()
|
150 |
-
|
151 |
submit_btn.click(
|
152 |
fn=competitor_teardown,
|
153 |
inputs=[url_input],
|
154 |
outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
|
155 |
)
|
156 |
-
|
157 |
gr.Markdown("---")
|
158 |
gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[π Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
|
159 |
|
160 |
-
|
|
|
|
|
|
|
|
|
|
8 |
import os
|
9 |
import re
|
10 |
|
11 |
+
# All the function definitions are correct and do not need to change.
|
12 |
+
# ... (fetch_html, analyze_onpage_seo, analyze_tech_stack, generate_ai_summary) ...
|
|
|
|
|
|
|
13 |
def fetch_html(url):
|
|
|
14 |
try:
|
15 |
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
16 |
response = requests.get(url, headers=headers, timeout=10)
|
|
|
21 |
return None
|
22 |
|
23 |
def analyze_onpage_seo(soup):
|
|
|
24 |
title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
|
25 |
description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
|
26 |
headings = {'h1': [], 'h2': [], 'h3': []}
|
|
|
31 |
return {"title": title, "description": description, "headings": headings, "word_count": word_count}
|
32 |
|
33 |
def analyze_tech_stack(soup, html):
|
|
|
34 |
tech = set()
|
35 |
if "react.js" in html or 'data-reactroot' in html: tech.add("React")
|
36 |
if "vue.js" in html: tech.add("Vue.js")
|
|
|
41 |
if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
|
42 |
return list(tech) if tech else ["Basic HTML/CSS"]
|
43 |
|
|
|
|
|
44 |
def generate_ai_summary(url, seo_data, tech_data):
|
|
|
|
|
|
|
|
|
45 |
api_key = os.environ.get("GEMINI_API_KEY")
|
|
|
|
|
46 |
if not api_key:
|
47 |
return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
|
|
|
48 |
try:
|
49 |
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
|
|
|
50 |
prompt = f"""
|
51 |
You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
|
|
|
52 |
## On-Page SEO Analysis:
|
53 |
- **Title:** {seo_data['title']}
|
54 |
- **Meta Description:** {seo_data['description']}
|
55 |
- **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
|
56 |
- **Page Word Count:** {seo_data['word_count']}
|
|
|
57 |
## Technology Stack:
|
58 |
- {', '.join(tech_data)}
|
|
|
59 |
## Your Analysis:
|
60 |
Provide a 3-bullet point summary covering:
|
61 |
1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
|
|
|
65 |
response = llm.invoke([HumanMessage(content=prompt)])
|
66 |
return response.content
|
67 |
except Exception as e:
|
|
|
68 |
return f"Could not generate AI summary. The API call failed: {e}"
|
69 |
|
|
|
|
|
70 |
def competitor_teardown(url):
|
|
|
71 |
if not url.startswith(('http://', 'https://')):
|
72 |
url = 'https://' + url
|
|
|
73 |
yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
|
|
74 |
html = fetch_html(url)
|
75 |
if not html:
|
76 |
yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
|
77 |
return
|
|
|
78 |
soup = BeautifulSoup(html, 'html.parser')
|
|
|
79 |
yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
80 |
seo_data = analyze_onpage_seo(soup)
|
81 |
tech_data = analyze_tech_stack(soup, html)
|
|
|
82 |
yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
|
83 |
ai_summary = generate_ai_summary(url, seo_data, tech_data)
|
|
|
84 |
seo_md = f"""
|
85 |
### π SEO & Content
|
86 |
| Metric | Value |
|
|
|
88 |
| **Page Title** | `{seo_data['title']}` |
|
89 |
| **Meta Description** | `{seo_data['description']}` |
|
90 |
| **Word Count** | `{seo_data['word_count']}` |
|
|
|
91 |
#### Heading Structure:
|
92 |
- **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
|
93 |
- **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
|
94 |
"""
|
|
|
95 |
tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
|
|
|
96 |
yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
|
97 |
|
98 |
+
# --- Gradio UI Definition (Unchanged) ---
|
99 |
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
|
100 |
gr.Markdown("# π΅οΈ Gumbo Board: The Instant Competitor Teardown")
|
101 |
gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
|
|
|
102 |
with gr.Row():
|
103 |
url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
|
104 |
submit_btn = gr.Button("Analyze", variant="primary", scale=1)
|
|
|
105 |
with gr.Tabs():
|
106 |
with gr.TabItem("π§ AI Summary"):
|
107 |
summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
|
|
|
113 |
ads_output = gr.Markdown()
|
114 |
with gr.TabItem("Social Presence (Coming Soon)"):
|
115 |
social_output = gr.Markdown()
|
|
|
116 |
submit_btn.click(
|
117 |
fn=competitor_teardown,
|
118 |
inputs=[url_input],
|
119 |
outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
|
120 |
)
|
|
|
121 |
gr.Markdown("---")
|
122 |
gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[π Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
|
123 |
|
124 |
+
# --- THE FIX: Launch the app within a main block ---
|
125 |
+
# This tells the Python interpreter that this is the main program to run
|
126 |
+
# and it should wait here, keeping the server alive.
|
127 |
+
if __name__ == "__main__":
|
128 |
+
demo.launch()
|