Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,11 +21,34 @@ def gradio_fetch_and_parse(url):
|
|
21 |
parsed_content = parse_html(html_content)
|
22 |
return parsed_content
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
25 |
iface = gr.Interface(
|
26 |
-
fn=
|
27 |
inputs=gr.Textbox(label="URL์ ์
๋ ฅํ์ธ์"),
|
28 |
-
outputs=gr.Textbox(label="
|
29 |
)
|
30 |
|
31 |
-
iface.launch()
|
|
|
21 |
parsed_content = parse_html(html_content)
|
22 |
return parsed_content
|
23 |
|
24 |
+
def get_main_content(html_content):
|
25 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
26 |
+
# ์: ๋ณธ๋ฌธ ๋ด์ฉ์ด <div class="main-content"> ์์ ์๋ค๊ณ ๊ฐ์
|
27 |
+
main_content = soup.find('div', class_='main-content')
|
28 |
+
return main_content.get_text(strip=True) if main_content else ''
|
29 |
+
|
30 |
+
def format_script(text):
|
31 |
+
# ํ
์คํธ๋ฅผ ๋ฌธ์ฅ๋ณ๋ก ๋ถ๋ฆฌ
|
32 |
+
sentences = text.split('.')
|
33 |
+
# 2๋ฌธ์ฅ์ฉ ๋ฌถ์ด์ ์คํฌ๋ฆฝํธ ํํ๋ก ์์ฑ
|
34 |
+
script = ""
|
35 |
+
for i in range(0, min(len(sentences), 10), 2):
|
36 |
+
script += sentences[i].strip() + '. '
|
37 |
+
if i+1 < len(sentences):
|
38 |
+
script += sentences[i+1].strip() + '\n'
|
39 |
+
return script
|
40 |
+
|
41 |
+
# Gradio ์ธํฐํ์ด์ค ํจ์
|
42 |
+
def gradio_fetch_and_format_script(url):
|
43 |
+
html_content = get_url_content(url)
|
44 |
+
main_content = get_main_content(html_content)
|
45 |
+
return format_script(main_content)
|
46 |
+
|
47 |
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
48 |
iface = gr.Interface(
|
49 |
+
fn=gradio_fetch_and_format_script,
|
50 |
inputs=gr.Textbox(label="URL์ ์
๋ ฅํ์ธ์"),
|
51 |
+
outputs=gr.Textbox(label="์์์ฉ ์คํฌ๋ฆฝํธ")
|
52 |
)
|
53 |
|
54 |
+
iface.launch()
|