Spaces:

Kims12
/

blog

Sleeping

App Files Files Community

Kims12 commited on Jan 15

Commit

4e5b597

verified ·

1 Parent(s): 4edbd70

Upload 3 files

Browse files

Files changed (3) hide show

README.md +5 -4
app.py +67 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Blog
-emoji: 😻
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
 sdk_version: 5.12.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Blogcr111111
+emoji: 🏢
+colorFrom: blue
+colorTo: green
 sdk: gradio
 sdk_version: 5.12.0
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: blogcr111111
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import requests
+from bs4 import BeautifulSoup
+import gradio as gr
+def convert_to_mobile_url(url):
+    """
+    PC URL을 모바일 URL로 변환.
+    """
+    if "m.blog.naver.com" not in url:
+        if "blog.naver.com" in url:
+            url_parts = url.split("/")
+            if len(url_parts) >= 5:
+                user_id = url_parts[3]
+                post_id = url_parts[4]
+                return f"https://m.blog.naver.com/{user_id}/{post_id}"
+    return url
+def scrape_naver_blog(url):
+    """
+    네이버 블로그의 제목과 내용(텍스트만) 스크래핑.
+    """
+    try:
+        # 모바일 URL 변환
+        mobile_url = convert_to_mobile_url(url)
+        print(f"Converted Mobile URL: {mobile_url}")
+        response = requests.get(mobile_url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 제목 스크래핑
+        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
+        title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"
+        # 본문 내용 스크래핑
+        content_elements = soup.find_all("div", class_="se-module se-module-text")
+        content = "\n".join(
+            elem.get_text(strip=True) for elem in content_elements
+        ) if content_elements else "내용을 찾을 수 없음"
+        # 디버깅 메시지 출력
+        print(f"Scraped Title: {title}")
+        print(f"Scraped Content: {content}")
+        # 결과 반환
+        result = f"제목: {title}\n\n내용: {content}"
+        return result
+    except Exception as e:
+        print(f"Error: {e}")
+        return f"Error: {e}"
+# Gradio 인터페이스 정의
+def run_scraper(url):
+    return scrape_naver_blog(url)
+interface = gr.Interface(
+    fn=run_scraper,
+    inputs=gr.Textbox(label="네이버 블로그 URL"),
+    outputs=gr.Textbox(label="스크래핑 결과"),
+    title="네이버 블로그 스크래핑",
+    description="네이버 블로그의 제목과 내용을 스크래핑합니다."
+)
+if __name__ == "__main__":
+    interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+requests
+bs4