Kims12 commited on
Commit
4e5b597
ยท
verified ยท
1 Parent(s): 4edbd70

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. app.py +67 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Blog
3
- emoji: ๐Ÿ˜ป
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Blogcr111111
3
+ emoji: ๐Ÿข
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: blogcr111111
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import gradio as gr
4
+
5
+ def convert_to_mobile_url(url):
6
+ """
7
+ PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
8
+ """
9
+ if "m.blog.naver.com" not in url:
10
+ if "blog.naver.com" in url:
11
+ url_parts = url.split("/")
12
+ if len(url_parts) >= 5:
13
+ user_id = url_parts[3]
14
+ post_id = url_parts[4]
15
+ return f"https://m.blog.naver.com/{user_id}/{post_id}"
16
+ return url
17
+
18
+ def scrape_naver_blog(url):
19
+ """
20
+ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ(ํ…์ŠคํŠธ๋งŒ) ์Šคํฌ๋ž˜ํ•‘.
21
+ """
22
+ try:
23
+ # ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
24
+ mobile_url = convert_to_mobile_url(url)
25
+ print(f"Converted Mobile URL: {mobile_url}")
26
+
27
+ response = requests.get(mobile_url)
28
+ response.raise_for_status()
29
+
30
+ soup = BeautifulSoup(response.text, 'html.parser')
31
+
32
+ # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
33
+ title_element = soup.find("div", class_="se-module se-module-text se-title-text")
34
+ title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
35
+
36
+ # ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
37
+ content_elements = soup.find_all("div", class_="se-module se-module-text")
38
+ content = "\n".join(
39
+ elem.get_text(strip=True) for elem in content_elements
40
+ ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
41
+
42
+ # ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
43
+ print(f"Scraped Title: {title}")
44
+ print(f"Scraped Content: {content}")
45
+
46
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
47
+ result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
48
+ return result
49
+
50
+ except Exception as e:
51
+ print(f"Error: {e}")
52
+ return f"Error: {e}"
53
+
54
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
55
+ def run_scraper(url):
56
+ return scrape_naver_blog(url)
57
+
58
+ interface = gr.Interface(
59
+ fn=run_scraper,
60
+ inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
61
+ outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
62
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
63
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ requests
3
+ bs4