Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
|
|
4 |
from requests.adapters import HTTPAdapter
|
5 |
from requests.packages.urllib3.util.retry import Retry
|
6 |
import re
|
|
|
7 |
|
8 |
def setup_session():
|
9 |
session = requests.Session()
|
@@ -39,9 +40,11 @@ def crawl_naver_search_results(url):
|
|
39 |
return html_table
|
40 |
|
41 |
def get_blog_content(link):
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
45 |
title = soup.find("div", class_="se-module se-module-text se-title-text")
|
46 |
if title:
|
47 |
title = title.find("span").text.strip().replace("<!-- -->", "")
|
|
|
4 |
from requests.adapters import HTTPAdapter
|
5 |
from requests.packages.urllib3.util.retry import Retry
|
6 |
import re
|
7 |
+
from selenium import webdriver
|
8 |
|
9 |
def setup_session():
|
10 |
session = requests.Session()
|
|
|
40 |
return html_table
|
41 |
|
42 |
def get_blog_content(link):
|
43 |
+
options = webdriver.ChromeOptions()
|
44 |
+
options.add_argument("--disable-javascript")
|
45 |
+
driver = webdriver.Chrome(options=options)
|
46 |
+
driver.get(link)
|
47 |
+
soup = BeautifulSoup(driver.page_source, "html.parser")
|
48 |
title = soup.find("div", class_="se-module se-module-text se-title-text")
|
49 |
if title:
|
50 |
title = title.find("span").text.strip().replace("<!-- -->", "")
|