mgbam commited on
Commit
c7dbe52
Β·
verified Β·
1 Parent(s): 656a257

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -40
app.py CHANGED
@@ -8,13 +8,9 @@ from langchain_core.messages import HumanMessage
8
  import os
9
  import re
10
 
11
- # --- Configuration ---
12
- # We no longer initialize the LLM here. We will do it on-demand.
13
-
14
- # --- Core Scraping & Analysis Functions (Unchanged) ---
15
-
16
  def fetch_html(url):
17
- """Fetches HTML content from a URL with a browser-like user-agent."""
18
  try:
19
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
20
  response = requests.get(url, headers=headers, timeout=10)
@@ -25,7 +21,6 @@ def fetch_html(url):
25
  return None
26
 
27
  def analyze_onpage_seo(soup):
28
- """Analyzes the on-page SEO elements of a webpage."""
29
  title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
30
  description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
31
  headings = {'h1': [], 'h2': [], 'h3': []}
@@ -36,7 +31,6 @@ def analyze_onpage_seo(soup):
36
  return {"title": title, "description": description, "headings": headings, "word_count": word_count}
37
 
38
  def analyze_tech_stack(soup, html):
39
- """Performs a very basic tech stack analysis."""
40
  tech = set()
41
  if "react.js" in html or 'data-reactroot' in html: tech.add("React")
42
  if "vue.js" in html: tech.add("Vue.js")
@@ -47,34 +41,21 @@ def analyze_tech_stack(soup, html):
47
  if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
48
  return list(tech) if tech else ["Basic HTML/CSS"]
49
 
50
- # --- AI Summary Generation (Now with Robust Initialization) ---
51
-
52
  def generate_ai_summary(url, seo_data, tech_data):
53
- """Generates a high-level strategic summary using an LLM."""
54
-
55
- # ** THE FIX: Lazy Initialization **
56
- # We initialize the LLM inside the function, only when it's needed.
57
  api_key = os.environ.get("GEMINI_API_KEY")
58
-
59
- # Provide a clear error message if the key is missing.
60
  if not api_key:
61
  return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
62
-
63
  try:
64
  llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
65
-
66
  prompt = f"""
67
  You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
68
-
69
  ## On-Page SEO Analysis:
70
  - **Title:** {seo_data['title']}
71
  - **Meta Description:** {seo_data['description']}
72
  - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
73
  - **Page Word Count:** {seo_data['word_count']}
74
-
75
  ## Technology Stack:
76
  - {', '.join(tech_data)}
77
-
78
  ## Your Analysis:
79
  Provide a 3-bullet point summary covering:
80
  1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
@@ -84,32 +65,22 @@ def generate_ai_summary(url, seo_data, tech_data):
84
  response = llm.invoke([HumanMessage(content=prompt)])
85
  return response.content
86
  except Exception as e:
87
- # Provide a more specific error if the API call itself fails
88
  return f"Could not generate AI summary. The API call failed: {e}"
89
 
90
- # --- The Main Orchestrator (Unchanged) ---
91
-
92
  def competitor_teardown(url):
93
- """The main function that runs the entire analysis pipeline."""
94
  if not url.startswith(('http://', 'https://')):
95
  url = 'https://' + url
96
-
97
  yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
98
-
99
  html = fetch_html(url)
100
  if not html:
101
  yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
102
  return
103
-
104
  soup = BeautifulSoup(html, 'html.parser')
105
-
106
  yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
107
  seo_data = analyze_onpage_seo(soup)
108
  tech_data = analyze_tech_stack(soup, html)
109
-
110
  yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
111
  ai_summary = generate_ai_summary(url, seo_data, tech_data)
112
-
113
  seo_md = f"""
114
  ### πŸ‘‘ SEO & Content
115
  | Metric | Value |
@@ -117,25 +88,20 @@ def competitor_teardown(url):
117
  | **Page Title** | `{seo_data['title']}` |
118
  | **Meta Description** | `{seo_data['description']}` |
119
  | **Word Count** | `{seo_data['word_count']}` |
120
-
121
  #### Heading Structure:
122
  - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
123
  - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
124
  """
125
-
126
  tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
127
-
128
  yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
129
 
130
- # --- Gradio UI (Unchanged) ---
131
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
132
  gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
133
  gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
134
-
135
  with gr.Row():
136
  url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
137
  submit_btn = gr.Button("Analyze", variant="primary", scale=1)
138
-
139
  with gr.Tabs():
140
  with gr.TabItem("🧠 AI Summary"):
141
  summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
@@ -147,14 +113,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}")
147
  ads_output = gr.Markdown()
148
  with gr.TabItem("Social Presence (Coming Soon)"):
149
  social_output = gr.Markdown()
150
-
151
  submit_btn.click(
152
  fn=competitor_teardown,
153
  inputs=[url_input],
154
  outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
155
  )
156
-
157
  gr.Markdown("---")
158
  gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
159
 
160
- demo.launch()
 
 
 
 
 
8
  import os
9
  import re
10
 
11
+ # All the function definitions are correct and do not need to change.
12
+ # ... (fetch_html, analyze_onpage_seo, analyze_tech_stack, generate_ai_summary) ...
 
 
 
13
  def fetch_html(url):
 
14
  try:
15
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
16
  response = requests.get(url, headers=headers, timeout=10)
 
21
  return None
22
 
23
  def analyze_onpage_seo(soup):
 
24
  title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
25
  description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
26
  headings = {'h1': [], 'h2': [], 'h3': []}
 
31
  return {"title": title, "description": description, "headings": headings, "word_count": word_count}
32
 
33
  def analyze_tech_stack(soup, html):
 
34
  tech = set()
35
  if "react.js" in html or 'data-reactroot' in html: tech.add("React")
36
  if "vue.js" in html: tech.add("Vue.js")
 
41
  if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
42
  return list(tech) if tech else ["Basic HTML/CSS"]
43
 
 
 
44
  def generate_ai_summary(url, seo_data, tech_data):
 
 
 
 
45
  api_key = os.environ.get("GEMINI_API_KEY")
 
 
46
  if not api_key:
47
  return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
 
48
  try:
49
  llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
 
50
  prompt = f"""
51
  You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
 
52
  ## On-Page SEO Analysis:
53
  - **Title:** {seo_data['title']}
54
  - **Meta Description:** {seo_data['description']}
55
  - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
56
  - **Page Word Count:** {seo_data['word_count']}
 
57
  ## Technology Stack:
58
  - {', '.join(tech_data)}
 
59
  ## Your Analysis:
60
  Provide a 3-bullet point summary covering:
61
  1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
 
65
  response = llm.invoke([HumanMessage(content=prompt)])
66
  return response.content
67
  except Exception as e:
 
68
  return f"Could not generate AI summary. The API call failed: {e}"
69
 
 
 
70
  def competitor_teardown(url):
 
71
  if not url.startswith(('http://', 'https://')):
72
  url = 'https://' + url
 
73
  yield "Fetching website...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
 
74
  html = fetch_html(url)
75
  if not html:
76
  yield "Failed to fetch URL. Please check the address and try again.", None, None, None, None, gr.Button("Analyze", interactive=True)
77
  return
 
78
  soup = BeautifulSoup(html, 'html.parser')
 
79
  yield "Analyzing SEO & Tech...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
80
  seo_data = analyze_onpage_seo(soup)
81
  tech_data = analyze_tech_stack(soup, html)
 
82
  yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
83
  ai_summary = generate_ai_summary(url, seo_data, tech_data)
 
84
  seo_md = f"""
85
  ### πŸ‘‘ SEO & Content
86
  | Metric | Value |
 
88
  | **Page Title** | `{seo_data['title']}` |
89
  | **Meta Description** | `{seo_data['description']}` |
90
  | **Word Count** | `{seo_data['word_count']}` |
 
91
  #### Heading Structure:
92
  - **H1 Tags ({len(seo_data['headings']['h1'])}):** {', '.join(f'`{h}`' for h in seo_data['headings']['h1'])}
93
  - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
94
  """
 
95
  tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
 
96
  yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
97
 
98
+ # --- Gradio UI Definition (Unchanged) ---
99
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
100
  gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
101
  gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
 
102
  with gr.Row():
103
  url_input = gr.Textbox(label="Enter Competitor URL", placeholder="e.g., notion.so or mailchimp.com", scale=4)
104
  submit_btn = gr.Button("Analyze", variant="primary", scale=1)
 
105
  with gr.Tabs():
106
  with gr.TabItem("🧠 AI Summary"):
107
  summary_output = gr.Markdown("Your AI-powered strategic summary will appear here.")
 
113
  ads_output = gr.Markdown()
114
  with gr.TabItem("Social Presence (Coming Soon)"):
115
  social_output = gr.Markdown()
 
116
  submit_btn.click(
117
  fn=competitor_teardown,
118
  inputs=[url_input],
119
  outputs=[summary_output, seo_output, tech_output, ads_output, social_output, submit_btn]
120
  )
 
121
  gr.Markdown("---")
122
  gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
123
 
124
+ # --- THE FIX: Launch the app within a main block ---
125
+ # This tells the Python interpreter that this is the main program to run
126
+ # and it should wait here, keeping the server alive.
127
+ if __name__ == "__main__":
128
+ demo.launch()