mgbam commited on
Commit
0e8510e
·
verified ·
1 Parent(s): 5db3b89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -152
app.py CHANGED
@@ -1,174 +1,159 @@
1
  import gradio as gr
2
- import requests
3
- from bs4 import BeautifulSoup
4
  import pandas as pd
 
5
  from langchain_google_genai import ChatGoogleGenerativeAI
6
  from langchain_core.messages import HumanMessage
7
  import os
8
- import re
9
 
10
  # --- Configuration ---
11
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
12
 
13
- # --- Core Analysis Functions (Unchanged) ---
14
- def fetch_html(url: str) -> str | None:
15
- # ... (code is the same as before) ...
16
- try:
17
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
18
- response = requests.get(url, headers=headers, timeout=10)
19
- response.raise_for_status()
20
- return response.text
21
- except requests.RequestException:
22
- return None
23
-
24
- def analyze_onpage_seo(soup: BeautifulSoup) -> dict:
25
- # ... (code is the same as before) ...
26
- title = soup.find('title').get_text(strip=True) if soup.find('title') else "Not found"
27
- description_tag = soup.find('meta', attrs={'name': 'description'})
28
- description = description_tag['content'] if description_tag and description_tag.has_attr('content') else "Not found"
29
- headings = {'h1': [], 'h2': []}
30
- for h_tag in ['h1', 'h2']:
31
- for tag in soup.find_all(h_tag):
32
- headings[h_tag].append(tag.get_text(strip=True))
33
- word_count = len(soup.get_text(separator=' ', strip=True).split())
34
- return {"title": title, "description": description, "headings": headings, "word_count": word_count}
35
-
36
- def analyze_tech_stack(html: str) -> list[str]:
37
- # ... (code is the same as before) ...
38
- tech = set()
39
- if "react.js" in html or 'data-reactroot' in html: tech.add("React")
40
- if "vue.js" in html: tech.add("Vue.js")
41
- if "wp-content" in html: tech.add("WordPress")
42
- if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
43
- if "GTM-" in html: tech.add("Google Tag Manager")
44
- if "tailwind" in html: tech.add("Tailwind CSS")
45
- if "shopify" in html: tech.add("Shopify")
46
- return list(tech) if tech else ["Basic HTML/CSS"]
47
-
48
- def analyze_ads_and_keywords(domain: str) -> dict:
49
- # ... (code is the same as before, with simulated data) ...
50
- print(f"Simulating Ads & Keywords API call for {domain}")
51
- if "notion" in domain:
52
- return { "keywords": [{"keyword": "what is notion", "volume": 65000, "cpc": 0.50}, {"keyword": "notion templates", "volume": 45000, "cpc": 1.20}, {"keyword": "second brain app", "volume": 12000, "cpc": 2.50}, {"keyword": "project management software", "volume": 25000, "cpc": 8.00},], "ads": [{"title": "Notion – Your All-in-One Workspace", "text": "Organize your life and work. From notes and docs, to projects and wikis, Notion is all you need."}, {"title": "The Best Second Brain App | Notion", "text": "Stop juggling tools. Notion combines everything you need to think, write, and plan in one place."},] }
53
- if "mailchimp" in domain:
54
- return { "keywords": [{"keyword": "email marketing", "volume": 90500, "cpc": 15.50}, {"keyword": "free email marketing tools", "volume": 14800, "cpc": 12.00}, {"keyword": "newsletter software", "volume": 8100, "cpc": 9.50},], "ads": [{"title": "Mailchimp: Marketing & Email", "text": "Grow your business with Mailchimp's All-in-One marketing, automation & email marketing platform."},] }
55
- return {"keywords": [], "ads": []}
56
-
57
- # --- AI Summary (Now with a Conversion Trigger) ---
58
- def generate_ai_summary(url: str, seo_data: dict, ads_data: dict) -> str:
59
  if not GEMINI_API_KEY:
60
- return "⚠️ **AI Summary Unavailable:** The `GEMINI_API_KEY` is not set in the Space secrets."
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GEMINI_API_KEY)
64
-
65
- ads_summary = "They do not appear to be running any significant Google Ads campaigns."
66
- if ads_data and ads_data.get('keywords'):
67
- top_keyword = ads_data['keywords'][0]
68
- ads_summary = f"They are actively running Google Ads, primarily bidding on high-intent keywords like **'{top_keyword['keyword']}'**."
69
-
70
- # *** NEW: CONVERSION TRIGGER IN THE PROMPT ***
71
- # We are telling the AI to hint at deeper insights that the Pro version would provide.
72
- prompt = f"""
73
- As a world-class marketing strategist, analyze the data for the website `{url}` and provide a concise, actionable summary in markdown format.
74
-
75
- **On-Page Focus:** Their primary H1 heading is "{seo_data['headings']['h1'][0] if seo_data['headings']['h1'] else 'N/A'}".
76
- **Paid Strategy:** {ads_summary}
77
-
78
- Based on this, provide your **Strategic Teardown**:
79
- - **🎯 Core Marketing Angle:** What is their main value proposition and selling point?
80
- - **📈 Customer Acquisition Focus:** Based on the data, are they focused more on organic SEO or paid advertising?
81
- - **💡 One Actionable Insight:** What is one clever tactic they're using, or one key opportunity they are missing?
82
-
83
- **Finally, add a "Go Deeper" section that hints at what a full analysis could uncover, like this example: "Go Deeper: A full Pro analysis could reveal their top-performing ad copy and entire keyword portfolio."**
84
- """
85
  response = llm.invoke([HumanMessage(content=prompt)])
86
- return response.content
 
 
87
  except Exception as e:
88
- return f"⚠️ **AI Summary Failed:** The API call could not be completed. Error: {e}"
89
-
90
- # --- Main Orchestrator (Unchanged) ---
91
- def competitor_teardown(url: str):
92
- # ... (code is the same as before) ...
93
- outputs = { "summary": " ", "seo": " ", "tech": " ", "ads": " ", "btn": gr.Button("Analyzing...", interactive=False) }
94
- yield list(outputs.values())
95
- if not url.startswith(('http://', 'https://')): url = 'https://' + url
96
- domain_match = re.search(r'https?://(?:www\.)?([^/]+)', url)
97
- if not domain_match:
98
- outputs["summary"] = "❌ **Invalid URL:** Please enter a valid website address like `notion.so`."
99
- outputs["btn"] = gr.Button("Analyze", interactive=True)
100
- yield list(outputs.values())
101
- return
102
- domain = domain_match.group(1)
103
- html = fetch_html(url)
104
- if not html:
105
- outputs["summary"] = f"❌ **Fetch Failed:** Could not retrieve content from `{url}`. The site may be down or blocking scrapers."
106
- outputs["btn"] = gr.Button("Analyze", interactive=True)
107
- yield list(outputs.values())
108
  return
109
- soup = BeautifulSoup(html, 'html.parser')
110
- seo_data = analyze_onpage_seo(soup)
111
- tech_data = analyze_tech_stack(html)
112
- ads_data = analyze_ads_and_keywords(domain)
113
- ai_summary = generate_ai_summary(url, seo_data, ads_data)
114
- outputs["summary"] = ai_summary
115
- outputs["seo"] = f"""### 👑 SEO & Content Analysis
116
- | Metric | Value |
117
- | :--- | :--- |
118
- | **Page Title** | `{seo_data['title']}` |
119
- | **Meta Description** | `{seo_data['description']}` |
120
- | **Word Count** | `{seo_data['word_count']:,}` |
121
- #### Heading Structure
122
- - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1']) if seo_data['headings']['h1'] else 'None Found'}
123
- - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
124
- """
125
- outputs["tech"] = "### ⚙️ Technology Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
126
- if ads_data.get("keywords"):
127
- df = pd.DataFrame(ads_data["keywords"])
128
- df['cpc'] = df['cpc'].apply(lambda x: f"${x:.2f}")
129
- ads_md = "### 📢 Paid Ads & Keywords\nThis competitor is actively bidding on Google Search ads. Here are their top keywords:\n\n"
130
- ads_md += df.to_markdown(index=False)
131
- ads_md += "\n\n### ✍️ Sample Ad Copy\n\n"
132
- for ad in ads_data["ads"]: ads_md += f"**{ad['title']}**\n\n>{ad['text']}\n\n---\n\n"
133
- outputs["ads"] = ads_md
134
- else:
135
- outputs["ads"] = "### 📢 Paid Ads & Keywords\n\nNo significant paid advertising activity was detected for this domain."
136
- outputs["btn"] = gr.Button("Analyze", interactive=True)
137
- yield list(outputs.values())
138
-
139
- # --- Gradio UI (Now with your Gumroad link and better conversion copy) ---
140
- with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
141
- gr.Markdown("# 🕵️ Gumbo Board: The Instant Competitor Teardown")
142
- gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
143
 
144
- with gr.Row():
145
- url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
146
- submit_btn = gr.Button("Analyze", variant="primary", scale=1)
147
-
148
- with gr.Tabs() as tabs:
149
- with gr.TabItem("🧠 AI Summary", id=0):
150
- summary_output = gr.Markdown()
151
- with gr.TabItem("🔍 On-Page SEO", id=1):
152
- seo_output = gr.Markdown()
153
- with gr.TabItem("⚙️ Tech Stack", id=2):
154
- tech_output = gr.Markdown()
155
- with gr.TabItem("📢 Ads & Keywords", id=3):
156
- ads_output = gr.Markdown()
 
 
 
157
 
158
- outputs_list = [summary_output, seo_output, tech_output, ads_output, submit_btn]
159
- submit_btn.click(fn=competitor_teardown, inputs=[url_input], outputs=outputs_list)
 
 
 
 
 
160
 
 
 
 
 
 
 
 
 
161
  gr.Markdown("---")
162
- # *** YOUR GUMROAD LINK AND CONVERSION-FOCUSED COPY ***
163
- gr.Markdown("""
164
- ### You're scratching the surface. Ready to go deeper?
165
- The Pro version gives you the unfair advantage:
166
- - **✅ Unlimited Reports:** Analyze everyone in your market.
167
- - **🔑 Live API Data:** Get real-time Ads & Keyword data for *any* domain.
168
- - **📄 PDF Exports:** Create beautiful reports for clients or your team.
169
-
170
- **[🚀 Unlock Gumbo Board Pro Now!](https://gumroadian864.gumroad.com/l/zvgxv)**
171
- """)
172
 
173
  if __name__ == "__main__":
174
  demo.launch()
 
1
  import gradio as gr
 
 
2
  import pandas as pd
3
+ import plotly.graph_objects as go
4
  from langchain_google_genai import ChatGoogleGenerativeAI
5
  from langchain_core.messages import HumanMessage
6
  import os
7
+ import random
8
 
9
  # --- Configuration ---
10
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
11
 
12
+ # --- Stage 1: Data Ingestion (Simulation) ---
13
+ def fetch_market_chatter(niche_description: str) -> list[str]:
14
+ """Simulates scraping Reddit, Hacker News, etc., for a given niche."""
15
+ print(f"Simulating scraping for: {niche_description}")
16
+ # In a real app, this would use PRAW, snscrape, etc.
17
+ # We'll generate realistic-sounding comments.
18
+ base_comments = [
19
+ "Ugh, another project management tool that charges per user. I'm a solo founder, this kills me.",
20
+ "I love the idea of Notion but it's just too slow and bloated now. I need something faster.",
21
+ "Why can't any of these tools just have a simple, reliable integration with Google Calendar?",
22
+ "I'm a writer, not a project manager. I just need a clean way to organize my chapters and research.",
23
+ "Asana is too complex. Trello is too simple. Is there anything in between?",
24
+ "The real magic would be a tool that automatically generates a weekly summary of my progress.",
25
+ "Their customer support is a joke. Took three days to get a reply on a critical bug.",
26
+ "The 'all-in-one' promise is a lie. It does 10 things poorly instead of one thing well.",
27
+ "If someone built a beautiful, minimalist PM tool for visual artists, I'd pay $50/month in a heartbeat."
28
+ ]
29
+ return random.sample(base_comments, k=random.randint(5, len(base_comments)))
30
+
31
+ # --- Stage 2 & 3: AI Analysis Pipeline ---
32
+ def analyze_resonance(niche_description: str, comments: list[str]) -> dict:
33
+ """Uses an LLM to perform topic modeling, pain point extraction, and sentiment analysis."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if not GEMINI_API_KEY:
35
+ return {"error": "GEMINI_API_KEY not set."}
36
 
37
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", google_api_key=GEMINI_API_KEY)
38
+
39
+ # Create a single, powerful prompt for the analysis
40
+ comments_formatted = "\n".join([f"- \"{c}\"" for c in comments])
41
+ prompt = f"""
42
+ You are a market research analyst with superhuman insight. Analyze the following raw market chatter (comments from Reddit, Hacker News, etc.) for a proposed product idea.
43
+
44
+ **Proposed Product Idea:** "{niche_description}"
45
+
46
+ **Raw Market Chatter:**
47
+ {comments_formatted}
48
+
49
+ **Your Task:**
50
+ Analyze the chatter and return a JSON object with the following structure. Do not include any text outside the JSON object.
51
+ {{
52
+ "pain_points": [
53
+ "A summary of the most common complaint or frustration.",
54
+ "A summary of the second most common complaint.",
55
+ "A summary of a third, more niche complaint."
56
+ ],
57
+ "magic_words": [
58
+ "A positive, benefit-oriented word or phrase people use.",
59
+ "Another evocative word people use to describe their ideal solution.",
60
+ "A third powerful, emotional word."
61
+ ],
62
+ "target_villain": "The name of a competitor or a type of product that people frequently complain about.",
63
+ "unserved_tribe": "A description of a specific user subgroup whose needs are not being met.",
64
+ "resonance_score": A number between 1 and 100 representing how well the proposed product idea fits the market chatter,
65
+ "resonance_justification": "A one-sentence explanation for the score you gave."
66
+ }}
67
+ """
68
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  response = llm.invoke([HumanMessage(content=prompt)])
70
+ # Clean up the response to ensure it's valid JSON
71
+ json_str = response.content.strip().replace("```json", "").replace("```", "")
72
+ return json.loads(json_str)
73
  except Exception as e:
74
+ return {"error": f"Failed to analyze resonance: {e}"}
75
+
76
+ # --- Main Orchestrator ---
77
+ def run_precog_analysis(niche_description: str):
78
+ # This orchestrates the entire pipeline
79
+ yield "Initializing Pre-Cog Engine...", None, None, gr.Button("Analyzing...", interactive=False)
80
+
81
+ # Stage 1
82
+ comments = fetch_market_chatter(niche_description)
83
+ yield f"Scanning Market Chatter... ({len(comments)} data points found)", None, None, gr.Button("Analyzing...", interactive=False)
84
+
85
+ # Stage 2 & 3
86
+ analysis_result = analyze_resonance(niche_description, comments)
87
+ if "error" in analysis_result:
88
+ yield f"Error: {analysis_result['error']}", None, None, gr.Button("Analyze", interactive=True)
 
 
 
 
 
89
  return
90
+
91
+ # --- Prepare Outputs ---
92
+
93
+ # The main report
94
+ score = analysis_result.get('resonance_score', 0)
95
+ color = "green" if score > 65 else "orange" if score > 40 else "red"
96
+ report_md = f"""
97
+ <div style="text-align:center; border: 2px solid {color}; border-radius:10px; padding:20px;">
98
+ <h2 style="margin:0;">Market Resonance Score</h2>
99
+ <p style="font-size: 80px; font-weight:bold; margin:0; color:{color};">{score}/100</p>
100
+ <p style="margin:0; font-style:italic;">"{analysis_result.get('resonance_justification', '')}"</p>
101
+ </div>
102
+
103
+ ### 💥 Top 3 Unspoken Pain Points
104
+ 1. **{analysis_result.get('pain_points', ['N/A'])[0]}**
105
+ 2. **{analysis_result.get('pain_points', ['N/A'])[1]}**
106
+ 3. **{analysis_result.get('pain_points', ['N/A'])[2]}**
107
+
108
+ ### Your "Magic Words" for Marketing
109
+ Use these exact words in your landing page copy. This is how your customers talk.
110
+ - `{analysis_result.get('magic_words', ['N/A'])[0]}`
111
+ - `{analysis_result.get('magic_words', ['N/A'])[1]}`
112
+ - `{analysis_result.get('magic_words', ['N/A'])[2]}`
113
+ """
114
+
115
+ # The strategic insights
116
+ strategy_md = f"""
117
+ ### 🎯 Your Go-to-Market Strategy
118
+
119
+ **Your "Villain":**
120
+ Position your product as the direct antidote to **{analysis_result.get('target_villain', 'N/A')}**. Your marketing should say, "Tired of [Villain's problem]? We fixed it."
 
 
 
121
 
122
+ **Your Unserved "Tribe":**
123
+ Focus your initial launch on this niche group: **{analysis_result.get('unserved_tribe', 'N/A')}**. They are desperately looking for a solution and will become your first evangelists.
124
+ """
125
+
126
+ yield report_md, strategy_md, "Analysis Complete", gr.Button("Analyze", interactive=True)
127
+
128
+ # --- Gradio UI ---
129
+ with gr.Blocks(theme=gr.themes.Glass(), css=".gradio-container{max-width: 800px !important}") as demo:
130
+ gr.Markdown("# 🔮 Pre-Cog: The Market Resonance Engine")
131
+ gr.Markdown("Stop guessing. Analyze real-time market chatter to see if your idea will succeed *before* you build it.")
132
+
133
+ with gr.Column():
134
+ niche_input = gr.Textbox(label="Describe Your Product Idea or Niche", placeholder="e.g., 'An AI-powered project management tool for solopreneurs' or 'A better way to learn guitar online'")
135
+ submit_btn = gr.Button("Analyze Resonance", variant="primary")
136
+
137
+ gr.Markdown("---")
138
 
139
+ with gr.Row():
140
+ with gr.Column(scale=2):
141
+ gr.Markdown("## Resonance Report")
142
+ report_output = gr.Markdown()
143
+ with gr.Column(scale=1):
144
+ gr.Markdown("## Strategy")
145
+ strategy_output = gr.Markdown()
146
 
147
+ status_output = gr.Markdown() # A simple status bar
148
+
149
+ submit_btn.click(
150
+ fn=run_precog_analysis,
151
+ inputs=[niche_input],
152
+ outputs=[report_output, strategy_output, status_output, submit_btn]
153
+ )
154
+
155
  gr.Markdown("---")
156
+ gr.Markdown("### This is a demo of the Pre-Cog Engine. \n The Pro version provides access to live data streams, deeper analysis, and continuous market monitoring. \n **[🚀 Launching Soon - Inquire About Early Access](https://gumroad.com/)**")
 
 
 
 
 
 
 
 
 
157
 
158
  if __name__ == "__main__":
159
  demo.launch()