File size: 2,158 Bytes
c05ccf6
c35600d
1840ee6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef1d2b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1840ee6
8904944
 
1840ee6
8904944
1840ee6
 
8904944
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio as gr
import requests
from bs4 import BeautifulSoup

def get_url_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return "URL์—์„œ ์ฝ˜ํ…์ธ ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."

def parse_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # ์›ํ•˜๋Š” HTML ์š”์†Œ๋ฅผ ํŒŒ์‹ฑํ•˜์—ฌ ๋ฐ˜ํ™˜
    # ์˜ˆ: soup.find_all('p') ๋“ฑ
    return soup.prettify()

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def gradio_fetch_and_parse(url):
    html_content = get_url_content(url)
    parsed_content = parse_html(html_content)
    return parsed_content

def get_main_content(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # ์˜ˆ: ๋ณธ๋ฌธ ๋‚ด์šฉ์ด <div class="main-content"> ์•ˆ์— ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
    main_content = soup.find('div', class_='main-content')
    return main_content.get_text(strip=True) if main_content else ''

def format_script(text):
    # ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ๋ณ„๋กœ ๋ถ„๋ฆฌ
    sentences = text.split('.')
    # 2๋ฌธ์žฅ์”ฉ ๋ฌถ์–ด์„œ ์Šคํฌ๋ฆฝํŠธ ํ˜•ํƒœ๋กœ ์ž‘์„ฑ
    script = ""
    for i in range(0, min(len(sentences), 10), 2):
        script += sentences[i].strip() + '. '
        if i+1 < len(sentences):
            script += sentences[i+1].strip() + '\n'
    return script
    
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def gradio_fetch_and_format_script(url):
    html_content = get_url_content(url)
    main_content = get_main_content(html_content)
    return format_script(main_content)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
iface_html = gr.Interface(
    fn=gradio_fetch_and_parse,
    inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
    outputs=gr.Textbox(label="์Šคํฌ๋žฉ๋œ HTML ์ฝ˜ํ…์ธ ")
)

iface_script = gr.Interface(
    fn=gradio_fetch_and_format_script,
    inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
    outputs=gr.Textbox(label="์˜์ƒ์šฉ ์Šคํฌ๋ฆฝํŠธ")
)

# ๋‘ ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ํƒญ์œผ๋กœ ๊ตฌ์„ฑํ•˜์—ฌ ์‹คํ–‰
iface_combined = gr.TabbedInterface(interfaces=[iface_html, iface_script], 
                                    tab_names=["HTML ๋ณด๊ธฐ", "์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ"])
iface_combined.launch()