AIRider commited on
Commit
a67b340
ยท
verified ยท
1 Parent(s): b843687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -120
app.py CHANGED
@@ -5,98 +5,6 @@ import re
5
  from huggingface_hub import InferenceClient
6
  from fpdf import FPDF
7
  from datetime import datetime
8
- import requests
9
- from bs4 import BeautifulSoup
10
- from requests.adapters import HTTPAdapter
11
- from requests.packages.urllib3.util.retry import Retry
12
- import time
13
-
14
- def setup_session():
15
- try:
16
- session = requests.Session()
17
- retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
18
- session.mount('https://', HTTPAdapter(max_retries=retries))
19
- return session
20
- except Exception as e:
21
- return None
22
-
23
- def generate_naver_search_url(query):
24
- base_url = "https://search.naver.com/search.naver?"
25
- params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
26
- url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
27
- return url
28
-
29
- def crawl_blog_content(url, session):
30
- try:
31
- headers = {
32
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
33
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
34
- "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
35
- "Accept-Encoding": "gzip, deflate, br",
36
- "Connection": "keep-alive",
37
- "Referer": "https://search.naver.com/search.naver",
38
- }
39
-
40
- delay = random.uniform(1, 2)
41
- time.sleep(delay)
42
-
43
- response = session.get(url, headers=headers)
44
- if response.status_code != 200:
45
- return ""
46
-
47
- soup = BeautifulSoup(response.content, "html.parser")
48
- content = soup.find("div", attrs={'class': 'se-main-container'})
49
-
50
- if content:
51
- return clean_text(content.get_text())
52
- else:
53
- return ""
54
- except Exception as e:
55
- return ""
56
-
57
- def crawl_naver_search_results(url, session):
58
- try:
59
- headers = {
60
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
61
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
62
- "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
63
- "Accept-Encoding": "gzip, deflate, br",
64
- "Connection": "keep-alive",
65
- "Referer": "https://search.naver.com/search.naver",
66
- }
67
- response = session.get(url, headers=headers)
68
- if response.status_code != 200:
69
- return []
70
-
71
- soup = BeautifulSoup(response.content, "html.parser")
72
- results = []
73
- count = 0
74
- for li in soup.find_all("li", class_=re.compile("bx.*")):
75
- if count >= 10:
76
- break
77
- for div in li.find_all("div", class_="detail_box"):
78
- for div2 in div.find_all("div", class_="title_area"):
79
- title = div2.text.strip()
80
- for a in div2.find_all("a", href=True):
81
- link = a["href"]
82
- if "blog.naver" in link:
83
- link = link.replace("https://", "https://m.")
84
- results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
85
- count += 1
86
- if count >= 10:
87
- break
88
- if count >= 10:
89
- break
90
- if count >= 10:
91
- break
92
-
93
- return results
94
- except Exception as e:
95
- return []
96
-
97
- def clean_text(text):
98
- text = re.sub(r'\s+', ' ', text).strip()
99
- return text
100
 
101
  def create_client(model_name):
102
  return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
@@ -133,22 +41,6 @@ def generate_blog_post(category, style, topic, references1, references2, referen
133
  formatted_text = modified_text.replace('\n', '\n\n')
134
  return formatted_text
135
 
136
- def fetch_references(topic):
137
- search_url = generate_naver_search_url(topic)
138
- session = setup_session()
139
- if session is None:
140
- return "Failed to set up session.", "", "", ""
141
- results = crawl_naver_search_results(search_url, session)
142
- if not results:
143
- return "No results found.", "", "", ""
144
-
145
- selected_results = random.sample(results, 3)
146
- references1_content = f"์ œ๋ชฉ: {selected_results[0]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
147
- references2_content = f"์ œ๋ชฉ: {selected_results[1]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
148
- references3_content = f"์ œ๋ชฉ: {selected_results[2]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
149
-
150
- return "์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑ ์™„๋ฃŒ", references1_content, references2_content, references3_content
151
-
152
  def get_title_prompt(category):
153
  if (category == "๊ณ ๊ฐ๋ฐ˜์‘ํ˜•"):
154
  return """
@@ -399,7 +291,7 @@ with gr.Blocks() as demo:
399
  gr.Markdown(f"# {title}")
400
 
401
  gr.Markdown("### 1๋‹จ๊ณ„: ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”", elem_id="step-title")
402
- category = gr.Radio(choices=["๊ณ ๊ฐ๋ฐ˜์‘ํ˜•"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="๊ณ ๊ฐ๋ฐ˜์‘ํ˜•")
403
 
404
  gr.Markdown("---\n\n")
405
 
@@ -419,17 +311,6 @@ with gr.Blocks() as demo:
419
  references2 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 2", placeholder="์ฐธ๊ณ ํ•  ๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=True)
420
  references3 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 3", placeholder="์ฐธ๊ณ ํ•  ๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=True)
421
 
422
- # ์ง„ํ–‰ ์ƒํ™ฉ ํ‘œ์‹œ๋ฅผ ์œ„ํ•œ ์ถœ๋ ฅ ํ…์ŠคํŠธ๋ฐ•์Šค
423
- progress_output = gr.Textbox(label="์ง„ํ–‰ ์ƒํ™ฉ", lines=2, visible=True)
424
-
425
- # ์ฐธ๊ณ ๊ธ€ ๊ฐ€์ ธ์˜ค๊ธฐ ๋ฒ„ํŠผ
426
- fetch_references_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
427
- fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
428
-
429
- # ์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ ๋ฒ„ํŠผ
430
- refill_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ")
431
- refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
432
-
433
  gr.Markdown("---\n\n")
434
 
435
  gr.Markdown("### 5๋‹จ๊ณ„: ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”", elem_id="step-title")
 
5
  from huggingface_hub import InferenceClient
6
  from fpdf import FPDF
7
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def create_client(model_name):
10
  return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
 
41
  formatted_text = modified_text.replace('\n', '\n\n')
42
  return formatted_text
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def get_title_prompt(category):
45
  if (category == "๊ณ ๊ฐ๋ฐ˜์‘ํ˜•"):
46
  return """
 
291
  gr.Markdown(f"# {title}")
292
 
293
  gr.Markdown("### 1๋‹จ๊ณ„: ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”", elem_id="step-title")
294
+ category = gr.Radio(choices=["๊ณ ๊ฐ๋ฐ˜์‘ํ˜•","๊ธฐ๋Šฅ์ง‘์ค‘ํ˜•(1๊ฐœ ๊ธฐ๋Šฅ)"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="๊ณ ๊ฐ๋ฐ˜์‘ํ˜•")
295
 
296
  gr.Markdown("---\n\n")
297
 
 
311
  references2 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 2", placeholder="์ฐธ๊ณ ํ•  ๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=True)
312
  references3 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 3", placeholder="์ฐธ๊ณ ํ•  ๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=True)
313
 
 
 
 
 
 
 
 
 
 
 
 
314
  gr.Markdown("---\n\n")
315
 
316
  gr.Markdown("### 5๋‹จ๊ณ„: ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”", elem_id="step-title")