Kims12 commited on
Commit
9c3152a
ยท
verified ยท
1 Parent(s): 69ba054

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import random
5
+ import time
6
+
7
+ def scrape_naver_blog(url):
8
+ # ๊ฐ ์š”์ฒญ ์‚ฌ์ด์— 1์ดˆ์—์„œ 3์ดˆ ์‚ฌ์ด์˜ ๋žœ๋ค ๋”œ๋ ˆ์ด ์ ์šฉ
9
+ delay = random.uniform(1, 3)
10
+ time.sleep(delay)
11
+
12
+ # ์—ฌ๋Ÿฌ ๋ธŒ๋ผ์šฐ์ €์˜ User-Agent ๋ฌธ์ž์—ด์„ ๋žœ๋ค์œผ๋กœ ์„ ํƒ
13
+ user_agents = [
14
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
15
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15",
16
+ "Mozilla/5.0 (Linux; Android 11; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36"
17
+ ]
18
+
19
+ headers = {
20
+ "User-Agent": random.choice(user_agents),
21
+ "Referer": "https://m.blog.naver.com", # ๋„ค์ด๋ฒ„ ๋ชจ๋ฐ”์ผ ํŽ˜์ด์ง€์—์„œ ์˜จ ๊ฒƒ์ฒ˜๋Ÿผ ๋ณด์ด๋„๋ก ์„ค์ •
22
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
23
+ }
24
+
25
+ # ์ง€์ •๋œ URL์— GET ์š”์ฒญ
26
+ response = requests.get(url, headers=headers)
27
+ response.raise_for_status()
28
+
29
+ # BeautifulSoup์œผ๋กœ HTML ํŒŒ์‹ฑ
30
+ soup = BeautifulSoup(response.text, "html.parser")
31
+
32
+ # ์ œ๋ชฉ ์ถ”์ถœ: <div class="se-module se-module-text se-title-text"> ๋‚ด๋ถ€์˜ ํ…์ŠคํŠธ ์ถ”์ถœ
33
+ title_div = soup.find("div", class_="se-module se-module-text se-title-text")
34
+ if title_div:
35
+ title_text = title_div.get_text(strip=True)
36
+ else:
37
+ title_text = "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
38
+
39
+ # ๋ณธ๋ฌธ ๋‚ด์šฉ ์ถ”์ถœ: <div class="se-main-container"> ๋‚ด๋ถ€์˜ ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ
40
+ main_container = soup.find("div", class_="se-main-container")
41
+ if main_container:
42
+ content_text = main_container.get_text(separator="\n", strip=True)
43
+ else:
44
+ content_text = "๋ณธ๋ฌธ ๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
45
+
46
+ # ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ํ•˜๋‚˜์˜ ํ…์ŠคํŠธ๋กœ ๊ฒฐํ•ฉ
47
+ result = f"์ œ๋ชฉ: {title_text}\n\n๋ณธ๋ฌธ:\n{content_text}"
48
+ return result
49
+
50
+ # ๊ทธ๋ผ๋””์˜ค ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ (์ž…๋ ฅ์€ ํ…์ŠคํŠธ๋ฐ•์Šค, ์ถœ๋ ฅ์€ ํ…์ŠคํŠธ๋ฐ•์Šค)
51
+ interface = gr.Interface(
52
+ fn=scrape_naver_blog,
53
+ inputs=gr.inputs.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL ์ž…๋ ฅ"),
54
+ outputs=gr.outputs.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
55
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘ ๋„๊ตฌ",
56
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค. ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”."
57
+ )
58
+
59
+ if __name__ == "__main__":
60
+ interface.launch()