Spaces:

ahmednoorx
/

cold-email-assistant

Running

App Files Files Community

ahmednoorx commited on Jul 3

Commit

2e93556

verified ·

1 Parent(s): 7319f65

Update scraper.py

Browse files

Files changed (1) hide show

scraper.py +51 -7

scraper.py CHANGED Viewed

@@ -4,12 +4,19 @@ import time
 import re
 from urllib.parse import urlparse, urljoin
 import sqlite3
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from webdriver_manager.chrome import ChromeDriverManager
 class LinkedInScraper:
     def __init__(self, timeout=10, use_selenium=False):
@@ -25,11 +32,17 @@ class LinkedInScraper:
             'Upgrade-Insecure-Requests': '1',
         })
-        if self.use_selenium:
             self._setup_selenium()
     def _setup_selenium(self):
         """Setup Selenium WebDriver"""
         try:
             chrome_options = Options()
             chrome_options.add_argument('--headless')
@@ -285,3 +298,34 @@ class LinkedInScraper:
                 self.driver.quit()
             except:
                 pass

 import re
 from urllib.parse import urlparse, urljoin
 import sqlite3
+# Optional Selenium imports for advanced scraping
+try:
+    from selenium import webdriver
+    from selenium.webdriver.chrome.options import Options
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.support.ui import WebDriverWait
+    from selenium.webdriver.support import expected_conditions as EC
+    from webdriver_manager.chrome import ChromeDriverManager
+    SELENIUM_AVAILABLE = True
+except ImportError:
+    SELENIUM_AVAILABLE = False
+    print("⚠️ Selenium not available. Company research will use basic scraping only.")
 class LinkedInScraper:
     def __init__(self, timeout=10, use_selenium=False):
             'Upgrade-Insecure-Requests': '1',
         })
+        if self.use_selenium and SELENIUM_AVAILABLE:
             self._setup_selenium()
+        elif self.use_selenium and not SELENIUM_AVAILABLE:
+            print("⚠️ Selenium requested but not available. Falling back to basic scraping.")
     def _setup_selenium(self):
         """Setup Selenium WebDriver"""
+        if not SELENIUM_AVAILABLE:
+            print("⚠️ Selenium not available. Cannot setup WebDriver.")
+            return
         try:
             chrome_options = Options()
             chrome_options.add_argument('--headless')
                 self.driver.quit()
             except:
                 pass
+# Standalone function for easy import
+def scrape_company_info(input_data):
+    """
+    Scrape company information from LinkedIn URL or company name
+    Args:
+        input_data (str): LinkedIn URL or company name
+    Returns:
+        str: Scraped company information or error message if dependencies missing
+    """
+    if not SELENIUM_AVAILABLE:
+        return "Company research feature requires additional setup. Please install selenium and webdriver-manager for enterprise features."
+    try:
+        scraper = LinkedInScraper()
+        # Check if input is a LinkedIn URL
+        if 'linkedin.com' in input_data.lower():
+            result = scraper.scrape_linkedin_or_company(input_data, "")
+        else:
+            # Treat as company name
+            result = scraper.scrape_company_website(input_data)
+        return result if result else ""
+    except Exception as e:
+        print(f"Error in scrape_company_info: {e}")
+        return ""