File size: 2,533 Bytes
c05ccf6
c35600d
1840ee6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef1d2b4
 
b1ad9d3
76666bd
b1ad9d3
 
 
 
 
9f47898
f31d39f
 
38b8df6
f31d39f
ef1d2b4
38b8df6
9f47898
ef1d2b4
 
 
 
f31d39f
ef1d2b4
f31d39f
 
 
ef1d2b4
f31d39f
ef1d2b4
 
bf141c7
ef1d2b4
 
bf141c7
 
 
 
ef1d2b4
19ef13d
1840ee6
8904944
 
1840ee6
8904944
1840ee6
 
3476805
8904944
3476805
 
 
8904944
 
3476805
8904944
19ef13d
 
9f47898
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import requests
from bs4 import BeautifulSoup

def get_url_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        return "URL์—์„œ ์ฝ˜ํ…์ธ ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."

def parse_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # ์›ํ•˜๋Š” HTML ์š”์†Œ๋ฅผ ํŒŒ์‹ฑํ•˜์—ฌ ๋ฐ˜ํ™˜
    # ์˜ˆ: soup.find_all('p') ๋“ฑ
    return soup.prettify()

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
def gradio_fetch_and_parse(url):
    html_content = get_url_content(url)
    parsed_content = parse_html(html_content)
    return parsed_content

def get_main_content(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    # <script> ํƒœ๊ทธ๋ฅผ ์ฐพ์•„ ๋ณธ๋ฌธ ์ฝ˜ํ…์ธ  ์ถ”์ถœ
    scripts = soup.find_all('meta content')
    text = ''
    for script in scripts:
        if script.string:
            text += script.string.strip() + '\n'
    if text:
        print("์ถ”์ถœ๋œ ํ…์ŠคํŠธ:", text)
        return text
    else:
        print("๋ณธ๋ฌธ ์ฝ˜ํ…์ธ ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
        return ''



def format_script(text):
    sentences = text.split('.')
    script = ""
    for i in range(0, min(len(sentences), 10), 2):
        line = sentences[i].strip() + '. '
        if i+1 < len(sentences):
            line += sentences[i+1].strip() + '\n'
        script += line
        print("ํ˜„์žฌ ์Šคํฌ๋ฆฝํŠธ:", script)  # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊ทธ
    return script

    
def gradio_fetch_and_format_script(url):
    print("ํ•จ์ˆ˜ ํ˜ธ์ถœ๋จ:", url)
    html_content = get_url_content(url)
    main_content = get_main_content(html_content)
    print("์ถ”์ถœ๋œ ๋ณธ๋ฌธ:", main_content)
    script = format_script(main_content)
    print("์ƒ์„ฑ๋œ ์Šคํฌ๋ฆฝํŠธ:", script)
    return script


# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
iface_html = gr.Interface(
    fn=gradio_fetch_and_parse,
    inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
    outputs=gr.Textbox(label="์Šคํฌ๋žฉ๋œ HTML ์ฝ˜ํ…์ธ ")
)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
iface_script = gr.Interface(
    fn=gradio_fetch_and_format_script,  # ์ด ํ•จ์ˆ˜๊ฐ€ ํ˜ธ์ถœ๋˜์–ด์•ผ ํ•จ
    inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),  # ์ž…๋ ฅ ํ•„๋“œ
    outputs=gr.Textbox(label="์˜์ƒ์šฉ ์Šคํฌ๋ฆฝํŠธ")  # ์ถœ๋ ฅ ํ•„๋“œ
)


# ๋‘ ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ํƒญ์œผ๋กœ ๊ตฌ์„ฑํ•˜์—ฌ ์‹คํ–‰
iface_combined = gr.TabbedInterface([iface_html, iface_script], 
                                    ["HTML ๋ณด๊ธฐ", "์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ"])
iface_combined.launch()