Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,12 @@ USER_AGENTS = [
|
|
19 |
def scrape_blog(url):
|
20 |
logging.debug("์คํฌ๋ํ ์์: %s", url)
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# ์์ฒญ ์ ์ 1~3์ด ๋๋ค ๋๋ ์ด ์ ์ฉ (์๋ฒ ๋ถํ ๋ฐ ๋ด ๊ฐ์ง ํํผ)
|
23 |
delay = random.uniform(1, 3)
|
24 |
logging.debug("์์ฒญ ์ ๋๋ค ๋๋ ์ด: %.2f์ด", delay)
|
@@ -28,7 +34,7 @@ def scrape_blog(url):
|
|
28 |
user_agent = random.choice(USER_AGENTS)
|
29 |
headers = {
|
30 |
"User-Agent": user_agent,
|
31 |
-
"Referer": "https://m.blog.naver.com", #
|
32 |
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7"
|
33 |
}
|
34 |
logging.debug("์ค์ ๋ HTTP ํค๋: %s", headers)
|
@@ -47,7 +53,7 @@ def scrape_blog(url):
|
|
47 |
soup = BeautifulSoup(response.text, "html.parser")
|
48 |
logging.debug("HTML ํ์ฑ ์๋ฃ")
|
49 |
|
50 |
-
# ์ ๋ชฉ ์ถ์ถ (HTML ๊ตฌ์กฐ
|
51 |
title_div = soup.find("div", class_="se-module se-module-text se-title-text")
|
52 |
if title_div:
|
53 |
title_tag = title_div.find(["p", "span"])
|
@@ -59,7 +65,7 @@ def scrape_blog(url):
|
|
59 |
title_text = ""
|
60 |
logging.debug("์ถ์ถ๋ ์ ๋ชฉ: %s", title_text)
|
61 |
|
62 |
-
# ๋ณธ๋ฌธ ์ถ์ถ (HTML ๊ตฌ์กฐ
|
63 |
body_div = soup.find("div", class_="se-main-container")
|
64 |
if body_div:
|
65 |
# ๋ณธ๋ฌธ ๋ด ๋ชจ๋ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ฉฐ ๊ฐ ์์ ์ฌ์ด์ ์ค๋ฐ๊ฟ ์ ์ฉ
|
|
|
19 |
def scrape_blog(url):
|
20 |
logging.debug("์คํฌ๋ํ ์์: %s", url)
|
21 |
|
22 |
+
# URL์ด ๋ชจ๋ฐ์ผ ๋ฒ์ ์ด ์๋ ๊ฒฝ์ฐ ๋ชจ๋ฐ์ผ ๋ฒ์ ์ผ๋ก ๋ณํ
|
23 |
+
if "m.blog.naver.com" not in url:
|
24 |
+
new_url = url.replace("blog.naver.com", "m.blog.naver.com")
|
25 |
+
logging.debug("URL์ ๋ชจ๋ฐ์ผ ๋ฒ์ ์ผ๋ก ๋ณํ: %s", new_url)
|
26 |
+
url = new_url
|
27 |
+
|
28 |
# ์์ฒญ ์ ์ 1~3์ด ๋๋ค ๋๋ ์ด ์ ์ฉ (์๋ฒ ๋ถํ ๋ฐ ๋ด ๊ฐ์ง ํํผ)
|
29 |
delay = random.uniform(1, 3)
|
30 |
logging.debug("์์ฒญ ์ ๋๋ค ๋๋ ์ด: %.2f์ด", delay)
|
|
|
34 |
user_agent = random.choice(USER_AGENTS)
|
35 |
headers = {
|
36 |
"User-Agent": user_agent,
|
37 |
+
"Referer": "https://m.blog.naver.com", # ๋ชจ๋ฐ์ผ ํ์ด์ง์์ ์จ ๊ฒ์ฒ๋ผ ์ค์
|
38 |
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7"
|
39 |
}
|
40 |
logging.debug("์ค์ ๋ HTTP ํค๋: %s", headers)
|
|
|
53 |
soup = BeautifulSoup(response.text, "html.parser")
|
54 |
logging.debug("HTML ํ์ฑ ์๋ฃ")
|
55 |
|
56 |
+
# ์ ๋ชฉ ์ถ์ถ (๋ชจ๋ฐ์ผ ๋ฒ์ HTML ๊ตฌ์กฐ ์ฌ์ฉ)
|
57 |
title_div = soup.find("div", class_="se-module se-module-text se-title-text")
|
58 |
if title_div:
|
59 |
title_tag = title_div.find(["p", "span"])
|
|
|
65 |
title_text = ""
|
66 |
logging.debug("์ถ์ถ๋ ์ ๋ชฉ: %s", title_text)
|
67 |
|
68 |
+
# ๋ณธ๋ฌธ ์ถ์ถ (๋ชจ๋ฐ์ผ ๋ฒ์ HTML ๊ตฌ์กฐ ์ฌ์ฉ)
|
69 |
body_div = soup.find("div", class_="se-main-container")
|
70 |
if body_div:
|
71 |
# ๋ณธ๋ฌธ ๋ด ๋ชจ๋ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ฉฐ ๊ฐ ์์ ์ฌ์ด์ ์ค๋ฐ๊ฟ ์ ์ฉ
|