mgbam commited on
Commit
656a257
Β·
verified Β·
1 Parent(s): ec9b43b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -43
app.py CHANGED
@@ -8,11 +8,10 @@ from langchain_core.messages import HumanMessage
8
  import os
9
  import re
10
 
11
- # --- Configuration & Initialization ---
12
- # For deployment on Hugging Face, set GEMINI_API_KEY in the Space's secrets
13
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=os.environ.get("GEMINI_API_KEY"))
14
 
15
- # --- Core Scraping & Analysis Functions ---
16
 
17
  def fetch_html(url):
18
  """Fetches HTML content from a URL with a browser-like user-agent."""
@@ -29,21 +28,12 @@ def analyze_onpage_seo(soup):
29
  """Analyzes the on-page SEO elements of a webpage."""
30
  title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
31
  description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
32
-
33
  headings = {'h1': [], 'h2': [], 'h3': []}
34
  for h_tag in ['h1', 'h2', 'h3']:
35
  for tag in soup.find_all(h_tag):
36
  headings[h_tag].append(tag.get_text(strip=True))
37
-
38
- # Simple word count
39
  word_count = len(soup.get_text().split())
40
-
41
- return {
42
- "title": title,
43
- "description": description,
44
- "headings": headings,
45
- "word_count": word_count
46
- }
47
 
48
  def analyze_tech_stack(soup, html):
49
  """Performs a very basic tech stack analysis."""
@@ -55,36 +45,49 @@ def analyze_tech_stack(soup, html):
55
  if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
56
  if "GTM-" in html: tech.add("Google Tag Manager")
57
  if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
58
-
59
  return list(tech) if tech else ["Basic HTML/CSS"]
60
 
 
 
61
  def generate_ai_summary(url, seo_data, tech_data):
62
  """Generates a high-level strategic summary using an LLM."""
63
- prompt = f"""
64
- You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
65
-
66
- ## On-Page SEO Analysis:
67
- - **Title:** {seo_data['title']}
68
- - **Meta Description:** {seo_data['description']}
69
- - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
70
- - **Page Word Count:** {seo_data['word_count']}
71
-
72
- ## Technology Stack:
73
- - {', '.join(tech_data)}
74
-
75
- ## Your Analysis:
76
- Provide a 3-bullet point summary covering:
77
- 1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
78
- 2. **Their Target Audience:** Who are they talking to?
79
- 3. **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
80
- """
81
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  response = llm.invoke([HumanMessage(content=prompt)])
83
  return response.content
84
  except Exception as e:
85
- return f"Could not generate AI summary: {e}"
 
86
 
87
- # --- The Main Orchestrator ---
88
 
89
  def competitor_teardown(url):
90
  """The main function that runs the entire analysis pipeline."""
@@ -107,9 +110,6 @@ def competitor_teardown(url):
107
  yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
108
  ai_summary = generate_ai_summary(url, seo_data, tech_data)
109
 
110
- # --- Prepare Outputs ---
111
-
112
- # SEO Output
113
  seo_md = f"""
114
  ### πŸ‘‘ SEO & Content
115
  | Metric | Value |
@@ -123,12 +123,11 @@ def competitor_teardown(url):
123
  - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
124
  """
125
 
126
- # Tech Stack Output
127
  tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
128
 
129
  yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
130
 
131
- # --- Gradio UI ---
132
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
133
  gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
134
  gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
@@ -149,7 +148,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}")
149
  with gr.TabItem("Social Presence (Coming Soon)"):
150
  social_output = gr.Markdown()
151
 
152
- # Link the button click to the main function
153
  submit_btn.click(
154
  fn=competitor_teardown,
155
  inputs=[url_input],
@@ -159,6 +157,4 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}")
159
  gr.Markdown("---")
160
  gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
161
 
162
-
163
- # Launch the app
164
  demo.launch()
 
8
  import os
9
  import re
10
 
11
+ # --- Configuration ---
12
+ # We no longer initialize the LLM here. We will do it on-demand.
 
13
 
14
+ # --- Core Scraping & Analysis Functions (Unchanged) ---
15
 
16
  def fetch_html(url):
17
  """Fetches HTML content from a URL with a browser-like user-agent."""
 
28
  """Analyzes the on-page SEO elements of a webpage."""
29
  title = soup.find('title').get_text(strip=True) if soup.find('title') else "N/A"
30
  description = soup.find('meta', attrs={'name': 'description'})['content'] if soup.find('meta', attrs={'name': 'description'}) else "N/A"
 
31
  headings = {'h1': [], 'h2': [], 'h3': []}
32
  for h_tag in ['h1', 'h2', 'h3']:
33
  for tag in soup.find_all(h_tag):
34
  headings[h_tag].append(tag.get_text(strip=True))
 
 
35
  word_count = len(soup.get_text().split())
36
+ return {"title": title, "description": description, "headings": headings, "word_count": word_count}
 
 
 
 
 
 
37
 
38
  def analyze_tech_stack(soup, html):
39
  """Performs a very basic tech stack analysis."""
 
45
  if "gtag('config'" in html: tech.add("Google Analytics (GA4)")
46
  if "GTM-" in html: tech.add("Google Tag Manager")
47
  if soup.find('link', href=lambda x: x and 'tailwind' in x): tech.add("Tailwind CSS")
 
48
  return list(tech) if tech else ["Basic HTML/CSS"]
49
 
50
+ # --- AI Summary Generation (Now with Robust Initialization) ---
51
+
52
  def generate_ai_summary(url, seo_data, tech_data):
53
  """Generates a high-level strategic summary using an LLM."""
54
+
55
+ # ** THE FIX: Lazy Initialization **
56
+ # We initialize the LLM inside the function, only when it's needed.
57
+ api_key = os.environ.get("GEMINI_API_KEY")
58
+
59
+ # Provide a clear error message if the key is missing.
60
+ if not api_key:
61
+ return "ERROR: `GEMINI_API_KEY` is not set in the Space secrets. The AI summary cannot be generated. Please ask the Space owner to add it."
62
+
 
 
 
 
 
 
 
 
 
63
  try:
64
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=api_key)
65
+
66
+ prompt = f"""
67
+ You are a world-class marketing strategist. Analyze the following data for the website {url} and provide a concise, actionable summary.
68
+
69
+ ## On-Page SEO Analysis:
70
+ - **Title:** {seo_data['title']}
71
+ - **Meta Description:** {seo_data['description']}
72
+ - **H1 Headings:** {', '.join(seo_data['headings']['h1'])}
73
+ - **Page Word Count:** {seo_data['word_count']}
74
+
75
+ ## Technology Stack:
76
+ - {', '.join(tech_data)}
77
+
78
+ ## Your Analysis:
79
+ Provide a 3-bullet point summary covering:
80
+ 1. **Their Primary Goal:** What is this page trying to achieve based on its language and structure?
81
+ 2. **Their Target Audience:** Who are they talking to?
82
+ 3. **A Key Strategic Insight:** What is one clever thing they are doing, or one major missed opportunity?
83
+ """
84
  response = llm.invoke([HumanMessage(content=prompt)])
85
  return response.content
86
  except Exception as e:
87
+ # Provide a more specific error if the API call itself fails
88
+ return f"Could not generate AI summary. The API call failed: {e}"
89
 
90
+ # --- The Main Orchestrator (Unchanged) ---
91
 
92
  def competitor_teardown(url):
93
  """The main function that runs the entire analysis pipeline."""
 
110
  yield "Generating AI Summary...", None, None, None, None, gr.Button("Analyzing...", interactive=False)
111
  ai_summary = generate_ai_summary(url, seo_data, tech_data)
112
 
 
 
 
113
  seo_md = f"""
114
  ### πŸ‘‘ SEO & Content
115
  | Metric | Value |
 
123
  - **H2 Tags ({len(seo_data['headings']['h2'])}):** {len(seo_data['headings']['h2'])} found
124
  """
125
 
 
126
  tech_md = "### stack Tech Stack\n\n" + "\n".join([f"- `{t}`" for t in tech_data])
127
 
128
  yield ai_summary, seo_md, tech_md, "Analysis Complete", "More features coming soon!", gr.Button("Analyze", interactive=True)
129
 
130
+ # --- Gradio UI (Unchanged) ---
131
  with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important;}") as demo:
132
  gr.Markdown("# πŸ•΅οΈ Gumbo Board: The Instant Competitor Teardown")
133
  gr.Markdown("Enter a competitor's website to get an instant analysis of their online strategy. *Powered by Gumbo (BeautifulSoup) & AI.*")
 
148
  with gr.TabItem("Social Presence (Coming Soon)"):
149
  social_output = gr.Markdown()
150
 
 
151
  submit_btn.click(
152
  fn=competitor_teardown,
153
  inputs=[url_input],
 
157
  gr.Markdown("---")
158
  gr.Markdown("### Ready for More? \n Get unlimited reports, save projects, and export to PDF with our Pro plan. \n **[πŸš€ Launching Soon - Sign up on Gumroad!](https://gumroad.com/)**")
159
 
 
 
160
  demo.launch()