Kims12 commited on
Commit
9ab7afe
ยท
verified ยท
1 Parent(s): 34c33d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -0
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import os
4
+ from typing import Optional
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ #############################
9
+
10
+ # OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
11
+ openai.api_key = os.getenv("OPENAI_API_KEY")
12
+ if not openai.api_key:
13
+ raise ValueError("OpenAI API ํ† ํฐ(OPENAI_API_KEY)์ด ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
14
+
15
+ def call_openai_api(
16
+ content: str,
17
+ system_message: str,
18
+ max_tokens: int,
19
+ temperature: float,
20
+ top_p: float
21
+ ) -> str:
22
+ """
23
+ OpenAI์˜ GPT-4o-mini ๋ชจ๋ธ์„ ์ด์šฉํ•ด ํ•œ ๋ฒˆ์˜ ์งˆ๋ฌธ(content)์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜.
24
+ """
25
+ try:
26
+ response = openai.ChatCompletion.create(
27
+ model="gpt-4o-mini",
28
+ messages=[
29
+ {"role": "system", "content": system_message},
30
+ {"role": "user", "content": content},
31
+ ],
32
+ max_tokens=max_tokens,
33
+ temperature=temperature,
34
+ top_p=top_p,
35
+ )
36
+ assistant_message = response.choices[0].message['content']
37
+ return assistant_message
38
+ except Exception as e:
39
+ return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
40
+
41
+ #############################
42
+ # ๊ณ ๊ธ‰ ์„ค์ • (OpenAI) - ์ฝ”๋“œ์—์„œ๋งŒ ์ •์˜ (UI์— ๋…ธ์ถœ ๊ธˆ์ง€)
43
+ #############################
44
+
45
+ OPENAI_SYSTEM_MESSAGE = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ.
46
+ ๋„ˆ๋Š” ์ตœ๊ณ ์˜ ๋น„์„œ์ด๋‹ค.
47
+ ๋‚ด๊ฐ€ ์š”๊ตฌํ•˜๋Š” ๊ฒƒ๋“ค์„ ์ตœ๋Œ€ํ•œ ์ž์„ธํ•˜๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•˜๋ผ.
48
+ ##[๊ธฐ๋ณธ๊ทœ์น™]
49
+ 1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด(ํ•œ๊ธ€)๋กœ ์ž‘์„ฑํ•˜๋ผ.
50
+ 2. ๋„ˆ๋Š” ๊ฐ€์žฅ ์ฃผ๋ชฉ๋ฐ›๋Š” ๋งˆ์ผ€ํ„ฐ์ด๋ฉฐ ๋ธ”๋กœ๊ทธ ๋งˆ์ผ€ํŒ… ์ „๋ฌธ๊ฐ€์ด๋‹ค.
51
+ 3. ํŠนํžˆ ๋„ˆ๋Š” '์ •๋ณด์„ฑ(Informative)' ์ „๋ฌธ ๋ธ”๋กœ๊ทธ ๋งˆ์ผ€ํŒ… ์ „๋ฌธ๊ฐ€์ด๋‹ค.
52
+ 4. ์ •๋ณด ์ œ๊ณต์— ์ดˆ์ ์„ ๋งž์ถ”์–ด ์ž‘์„ฑํ•œ๋‹ค.
53
+ ##[ํ…์ŠคํŠธ ์ž‘์„ฑ ๊ทœ์น™]
54
+ 1. ์†Œ์ฃผ์ œ๋ฅผ 5๊ฐœ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ 2000์ž ์ด์ƒ๋˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
55
+ 2. ์ „์ฒด ๋งฅ๋ฝ์„ ์ดํ•ดํ•˜๊ณ  ๋ฌธ์žฅ์˜ ์ผ๊ด€์„ฑ์„ ์œ ์ง€ํ•˜๋ผ.
56
+ 3. ์ ˆ๋Œ€๋กœ ์ฐธ๊ณ ๊ธ€์„ ํ•œ๋ฌธ์žฅ ์ด์ƒ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋ง ๊ฒƒ.
57
+ 4. ์ฃผ์ œ์™€ ์ƒํ™ฉ์— ๋งž๋Š” ์ ์ ˆํ•œ ์–ดํœ˜๋ฅผ ์„ ํƒํ•˜๋ผ.
58
+ 5. ํ•œ๊ธ€ ์–ดํœ˜์˜ ๋‚œ์ด๋„๋Š” ์‰ฝ๊ฒŒ ์ž‘์„ฑํ•˜๋ผ.
59
+ 6. ์ ˆ๋Œ€ ๋ฌธ์žฅ์˜ ๋์— '๋‹ต๋‹ˆ๋‹ค'๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ๋ง ๊ฒƒ.
60
+ ###[์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์ž‘์„ฑ ๊ทœ์น™]
61
+ 1. ๋…์ž๊ฐ€ ์–ป๊ณ ์ž ํ•˜๋Š” ์œ ์šฉํ•œ ์ •๋ณด์™€ ํฅ๋ฏธ๋กœ์šด ์ •๋ณด๋ฅผ ์ œ๊ณตํ•˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
62
+ 2. ๋…์ž์˜ ๊ณต๊ฐ์„ ์ด๋Œ์–ด๋‚ด๊ณ  ๊ถ๊ธˆ์ฆ์„ ํ•ด๊ฒฐํ•˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
63
+ 3. ๋…์ž์˜ ๊ด€์‹ฌ์‚ฌ๋ฅผ ์ถฉ์กฑ์‹œํ‚ค๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
64
+ 4. ๋…์ž์—๊ฒŒ ์ด๋“์ด ๋˜๋Š” ์ •๋ณด๋ฅผ ์ž‘์„ฑํ•˜๋ผ.
65
+ ##[์ œ์™ธ ๊ทœ์น™]
66
+ 1. ๋ฐ˜๋“œ์‹œ ๋น„์†์–ด ๋ฐ ์š•์„ค(expletive, abusive language, slang)์€ ์ œ์™ธํ•˜๋ผ.
67
+ 2. ๋ฐ˜๋“œ์‹œ ์ฐธ๊ณ ๊ธ€์˜ ๋งํฌ(URL)๋Š” ์ œ์™ธํ•˜๋ผ.
68
+ 3. ์ฐธ๊ณ ๊ธ€์—์„œ '๋งํฌ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”'์™€ ๊ฐ™์€ ๋งํฌ ์ด๋™์˜ ๋ฌธ๊ตฌ๋Š” ์ œ์™ธํ•˜๋ผ.
69
+ 4. ์ฐธ๊ณ ๊ธ€์— ์žˆ๋Š” ์ž‘์„ฑ์ž, ํ™”์ž, ์œ ํŠœ๋ฒ„, ๊ธฐ์ž์˜ ์ด๋ฆ„, ์• ์นญ, ๋‹‰๋„ค์ž„์€ ๋ฐ˜๋“œ์‹œ ์ œ์™ธํ•˜๋ผ.
70
+ 5. ๋ฐ˜๋“œ์‹œ ๋ฌธ์žฅ์˜ ๋๋ถ€๋ถ„์ด ์–ด์ƒ‰ํ•œ ํ•œ๊ตญ์–ด ํ‘œํ˜„์€ ์ œ์™ธํ•˜๋ผ('์˜ˆ์š”', '๋‹ต๋‹ˆ๋‹ค', 'ํ•ด์š”', 'ํ•ด์ฃผ์ฃ ', '๋์ฃ ', '๋์–ด์š”', '๊ณ ์š”' ๋“ฑ.)
71
+ """
72
+
73
+ OPENAI_MAX_TOKENS = 4000
74
+ OPENAI_TEMPERATURE = 0.7
75
+ OPENAI_TOP_P = 0.95
76
+
77
+ #############################
78
+ # UI - ๋ธ”๋กœ๊ทธ ์ƒ์„ฑ๊ธฐ
79
+ #############################
80
+ def blog_generator():
81
+ with gr.Blocks() as blog_tab:
82
+ gr.Markdown("# ๋ธ”๋กœ๊ทธ ์ƒ์„ฑ๊ธฐ")
83
+
84
+ # ๋งํˆฌ๋ฐ”๊พธ๊ธฐ (๋ผ๋””์˜ค ๋ฒ„ํŠผ)
85
+ tone_radio = gr.Radio(
86
+ label="๋งํˆฌ๋ฐ”๊พธ๊ธฐ",
87
+ choices=["์นœ๊ทผํ•˜๊ฒŒ", "์ผ๋ฐ˜์ ์ธ", "์ „๋ฌธ์ ์ธ"],
88
+ value="์ผ๋ฐ˜์ ์ธ" # ๊ธฐ๋ณธ ์„ ํƒ
89
+ )
90
+
91
+ # ์ฐธ์กฐ๊ธ€ ์ž…๋ ฅ (3๊ฐœ)
92
+ ref1 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 1")
93
+ ref2 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 2")
94
+ ref3 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 3")
95
+
96
+ output_box = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20, interactive=False)
97
+
98
+ def generate_blog(tone_value: str, ref1_value: str, ref2_value: str, ref3_value: str) -> str:
99
+ # ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
100
+ question = (
101
+ f"๋งํˆฌ: {tone_value}\n"
102
+ f"์ฐธ์กฐ๊ธ€1: {ref1_value}\n"
103
+ f"์ฐธ์กฐ๊ธ€2: {ref2_value}\n"
104
+ f"์ฐธ์กฐ๊ธ€3: {ref3_value}\n"
105
+ )
106
+
107
+ # OpenAI GPT-4o-mini ๋ชจ๋ธ ํ˜ธ์ถœ
108
+ response = call_openai_api(
109
+ content=question,
110
+ system_message=OPENAI_SYSTEM_MESSAGE,
111
+ max_tokens=OPENAI_MAX_TOKENS,
112
+ temperature=OPENAI_TEMPERATURE,
113
+ top_p=OPENAI_TOP_P
114
+ )
115
+ return response
116
+
117
+ generate_button = gr.Button("์ƒ์„ฑํ•˜๊ธฐ")
118
+ generate_button.click(
119
+ fn=generate_blog,
120
+ inputs=[tone_radio, ref1, ref2, ref3],
121
+ outputs=output_box
122
+ )
123
+ return blog_tab
124
+
125
+ #############################
126
+ # ์ถ”๊ฐ€ ๊ธฐ๋Šฅ - ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘
127
+ #############################
128
+
129
+ def convert_to_mobile_url(url):
130
+ """
131
+ PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
132
+ """
133
+ if "m.blog.naver.com" not in url:
134
+ if "blog.naver.com" in url:
135
+ url_parts = url.split("/")
136
+ if len(url_parts) >= 5:
137
+ user_id = url_parts[3]
138
+ post_id = url_parts[4]
139
+ return f"https://m.blog.naver.com/{user_id}/{post_id}"
140
+ return url
141
+
142
+ def scrape_naver_blog(url):
143
+ """
144
+ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ๊ณผ ๋‚ด์šฉ(ํ…์ŠคํŠธ๋งŒ) ์Šคํฌ๋ž˜ํ•‘.
145
+ """
146
+ try:
147
+ # ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
148
+ mobile_url = convert_to_mobile_url(url)
149
+ print(f"Converted Mobile URL: {mobile_url}")
150
+
151
+ response = requests.get(mobile_url)
152
+ response.raise_for_status()
153
+
154
+ soup = BeautifulSoup(response.text, 'html.parser')
155
+
156
+ # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
157
+ title_element = soup.find("div", class_="se-module se-module-text se-title-text")
158
+ title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
159
+
160
+ # ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
161
+ content_elements = soup.find_all("div", class_="se-module se-module-text")
162
+ content = "\n".join(
163
+ elem.get_text(strip=True) for elem in content_elements
164
+ ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
165
+
166
+ # ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
167
+ print(f"Scraped Title: {title}")
168
+ print(f"Scraped Content: {content}")
169
+
170
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
171
+ result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
172
+ return result
173
+
174
+ except Exception as e:
175
+ print(f"Error: {e}")
176
+ return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
177
+
178
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
179
+ def run_scraper(url):
180
+ return scrape_naver_blog(url)
181
+
182
+ def naver_blog_scraper():
183
+ with gr.Blocks() as scraper_tab:
184
+ gr.Markdown("# ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘")
185
+
186
+ url_input = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL")
187
+ output_box = gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ", lines=20, interactive=False)
188
+ scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘ํ•˜๊ธฐ")
189
+
190
+ scrape_button.click(
191
+ fn=run_scraper,
192
+ inputs=url_input,
193
+ outputs=output_box
194
+ )
195
+ return scraper_tab
196
+
197
+ #############################
198
+ # ๋ฉ”์ธ UI - ํƒญ ๊ตฌ์„ฑ
199
+ #############################
200
+ with gr.Blocks() as demo:
201
+ gr.Markdown("# ํ†ตํ•ฉ Gradio ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜")
202
+
203
+ with gr.Tab("๋ธ”๋กœ๊ทธ ์ƒ์„ฑ๊ธฐ"):
204
+ blog_tab = blog_generator()
205
+
206
+ with gr.Tab("๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘"):
207
+ scraper_tab = naver_blog_scraper()
208
+
209
+ if __name__ == "__main__":
210
+ demo.launch()