Kims12 commited on
Commit
4e5b597
·
verified ·
1 Parent(s): 4edbd70

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. app.py +67 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Blog
3
- emoji: 😻
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Blogcr111111
3
+ emoji: 🏢
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: blogcr111111
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import gradio as gr
4
+
5
+ def convert_to_mobile_url(url):
6
+ """
7
+ PC URL을 모바일 URL로 변환.
8
+ """
9
+ if "m.blog.naver.com" not in url:
10
+ if "blog.naver.com" in url:
11
+ url_parts = url.split("/")
12
+ if len(url_parts) >= 5:
13
+ user_id = url_parts[3]
14
+ post_id = url_parts[4]
15
+ return f"https://m.blog.naver.com/{user_id}/{post_id}"
16
+ return url
17
+
18
+ def scrape_naver_blog(url):
19
+ """
20
+ 네이버 블로그의 제목과 내용(텍스트만) 스크래핑.
21
+ """
22
+ try:
23
+ # 모바일 URL 변환
24
+ mobile_url = convert_to_mobile_url(url)
25
+ print(f"Converted Mobile URL: {mobile_url}")
26
+
27
+ response = requests.get(mobile_url)
28
+ response.raise_for_status()
29
+
30
+ soup = BeautifulSoup(response.text, 'html.parser')
31
+
32
+ # 제목 스크래핑
33
+ title_element = soup.find("div", class_="se-module se-module-text se-title-text")
34
+ title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"
35
+
36
+ # 본문 내용 스크래핑
37
+ content_elements = soup.find_all("div", class_="se-module se-module-text")
38
+ content = "\n".join(
39
+ elem.get_text(strip=True) for elem in content_elements
40
+ ) if content_elements else "내용을 찾을 수 없음"
41
+
42
+ # 디버깅 메시지 출력
43
+ print(f"Scraped Title: {title}")
44
+ print(f"Scraped Content: {content}")
45
+
46
+ # 결과 반환
47
+ result = f"제목: {title}\n\n내용: {content}"
48
+ return result
49
+
50
+ except Exception as e:
51
+ print(f"Error: {e}")
52
+ return f"Error: {e}"
53
+
54
+ # Gradio 인터페이스 정의
55
+ def run_scraper(url):
56
+ return scrape_naver_blog(url)
57
+
58
+ interface = gr.Interface(
59
+ fn=run_scraper,
60
+ inputs=gr.Textbox(label="네이버 블로그 URL"),
61
+ outputs=gr.Textbox(label="스크래핑 결과"),
62
+ title="네이버 블로그 스크래핑",
63
+ description="네이버 블로그의 제목과 내용을 스크래핑합니다."
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ requests
3
+ bs4