AIRider commited on
Commit
6ed34e9
·
verified ·
1 Parent(s): e42d3e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
4
  from requests.adapters import HTTPAdapter
5
  from requests.packages.urllib3.util.retry import Retry
6
  import re
 
7
 
8
  def setup_session():
9
  session = requests.Session()
@@ -39,9 +40,11 @@ def crawl_naver_search_results(url):
39
  return html_table
40
 
41
  def get_blog_content(link):
42
- session = setup_session()
43
- response = session.get(link)
44
- soup = BeautifulSoup(response.text, "html.parser")
 
 
45
  title = soup.find("div", class_="se-module se-module-text se-title-text")
46
  if title:
47
  title = title.find("span").text.strip().replace("<!-- -->", "")
 
4
  from requests.adapters import HTTPAdapter
5
  from requests.packages.urllib3.util.retry import Retry
6
  import re
7
+ from selenium import webdriver
8
 
9
  def setup_session():
10
  session = requests.Session()
 
40
  return html_table
41
 
42
  def get_blog_content(link):
43
+ options = webdriver.ChromeOptions()
44
+ options.add_argument("--disable-javascript")
45
+ driver = webdriver.Chrome(options=options)
46
+ driver.get(link)
47
+ soup = BeautifulSoup(driver.page_source, "html.parser")
48
  title = soup.find("div", class_="se-module se-module-text se-title-text")
49
  if title:
50
  title = title.find("span").text.strip().replace("<!-- -->", "")