mgbam commited on
Commit
e0da2eb
Β·
verified Β·
1 Parent(s): bb6ee88

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ from langchain_core.messages import HumanMessage
8
+ import os
9
+ import re
10
+
11
+ # --- Configuration & Initialization ---
12
+ # For deployment on Hugging Face, set GEMINI_API_KEY in the Space's secrets
13
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=os.environ.get("GEMINI_API_KEY"))
14
+
15
+ # --- Core Scraping & Analysis Functions ---
16
+
17
+ def fetch_html(url):
18
+ """Fetches HTML content from a URL with a browser-like user-agent."""
19
+ try:
20
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
21
+ response = requests.get(url, headers=headers, timeout=10)
22
+ response.raise_for_status()
23
+ return response.text
24
+ except requests.RequestException as e:
25
+ print(f"Error fetching {url}: {e}")
26
+ return None
27
+
28
+ def analyze_onpage_seo(soup):
29
+ """Analyzes the on-page SEO elements of a webpage."""
30
+ title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
31
+ description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
32
+
33
+ headings = {'h1': [], 'h2': [], 'h3': []}
34
+ for h_tag in ['h1', 'h2', 'h3']:
35
+ for tag in soup.find_all(h_tag):
36
+ headings[h_tag].append(tag.get_text(strip=True))
37
+
38
+ # Simple word count
39
+ word_count = len(soup.get_text().split())
40
+
41
+ return {
42
+ "title": title,
43
+ "description": description,
44
+ "headings": headings,
45
+ "word_count": word_count
46
+ }
47
+
48
+ def analyze_tech_stack(soup, html):
49
+ """Performs a very basic tech stack analysis."""
50
+ tech = set()
51
+ if "react.js" in html or 'data-reactroot' in html: tech.add("React")
52
+ if "vue.js" in html: tech.add("Vue.js")
53
+ if "angular.js" in html: tech.add("Angular")
54
+ if "wp-content" in html: tech.add("WordPress")
55
+ if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
56
+ if "GTM-" in html: tech.add("Google Tag Manager")
57
+ if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
58
+
59
+ return list(tech) if tech else ["Basic HTML/CSS"]
60
+
61
+ def generate_ai_summary(url, seo_data, tech_data):
62
+ """Generates a high-level strategic summary using an LLM."""
63
+ prompt = f"""
64
+ You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
65
+
66
+ ## On-Page SEO Analysis:
67
+ - **Title:** {seo_data['title']}
68
+ - **Meta Description:** {seo_data['description']}
69
+ - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
70
+ - **Page Word Count:** {seo_data['word_count']}
71
+
72
+ ## Technology Stack:
73
+ - {', '.join(tech_data)}
74
+
75
+ ## Your Analysis:
76
+ Provide a 3-bullet point summary covering:
77
+ 1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
78
+ 2. **Their Target Audience:** Who are they talking to?
79
+ 3. **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
80
+ """
81
+ try:
82
+ response = llm.invoke([HumanMessage(content=prompt)])
83
+ return response.content
84
+ except Exception as e:
85
+ return f"Could not generate AI summary: {e}"
86
+
87
+ # --- The Main Orchestrator ---
88
+
89
+ def competitor_teardown(url):
90
+ """The main function that runs the entire analysis pipeline."""
91
+ if not url.startswith(('http://', 'https://')):
92
+ url = 'https://' + url
93
+
94
+ yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
95
+
96
+ html = fetch_html(url)
97
+ if not html:
98
+ yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
99
+ return
100
+
101
+ soup = BeautifulSoup(html, 'html.parser')
102
+
103
+ yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
104
+ seo_data = analyze_onpage_seo(soup)
105
+ tech_data = analyze_tech_stack(soup, html)
106
+
107
+ yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
108
+ ai_summary = generate_ai_summary(url, seo_data, tech_data)
109
+
110
+ # --- Prepare Outputs ---
111
+
112
+ # SEO Output
113
+ seo_md = f"""
114
+ ### πŸ‘‘ SEO & Content
115
+ | Metric | Value |
116
+ | :--- | :--- |
117
+ | **Page Title** | `{seo_data['title']}` |
118
+ | **Meta Description** | `{seo_data['description']}` |
119
+ | **Word Count** | `{seo_data['word_count']}` |
120
+
121
+ #### Heading Structure:
122
+ - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
123
+ - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
124
+ """
125
+
126
+ # Tech Stack Output
127
+ tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
128
+
129
+ yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
130
+
131
+ # --- Gradio UI ---
132
+ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
133
+ gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
134
+ gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
135
+
136
+ with gr.Row():
137
+ url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
138
+ submit_btn = gr.Button("Analyze", variant="primary", scale=1)
139
+
140
+ with gr.Tabs():
141
+ with gr.TabItem("🧠 AI Summary"):
142
+ summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
143
+ with gr.TabItem("πŸ” On-Page SEO"):
144
+ seo_output = gr.Markdown()
145
+ with gr.TabItem("βš™οΈ Tech Stack"):
146
+ tech_output = gr.Markdown()
147
+ with gr.TabItem("πŸ“’ Ads & Keywords (Coming Soon)"):
148
+ ads_output = gr.Markdown()
149
+ with gr.TabItem("Social Presence (Coming Soon)"):
150
+ social_output = gr.Markdown()
151
+
152
+ # Link the button click to the main function
153
+ submit_btn.click(
154
+ fn=competitor_teardown,
155
+ inputs=[url_input],
156
+ outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
157
+ )
158
+
159
+ gr.Markdown("---")
160
+ gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
161
+
162
+
163
+ # Launch the app
164
+ demo.launch()