seawolf2357 commited on
Commit
ef1d2b4
ยท
verified ยท
1 Parent(s): c05ccf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -3
app.py CHANGED
@@ -21,11 +21,34 @@ def gradio_fetch_and_parse(url):
21
  parsed_content = parse_html(html_content)
22
  return parsed_content
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
25
  iface = gr.Interface(
26
- fn=gradio_fetch_and_parse,
27
  inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
28
- outputs=gr.Textbox(label="์›นํŽ˜์ด์ง€ ์ฝ˜ํ…์ธ ")
29
  )
30
 
31
- iface.launch()
 
21
  parsed_content = parse_html(html_content)
22
  return parsed_content
23
 
24
+ def get_main_content(html_content):
25
+ soup = BeautifulSoup(html_content, 'html.parser')
26
+ # ์˜ˆ: ๋ณธ๋ฌธ ๋‚ด์šฉ์ด <div class="main-content"> ์•ˆ์— ์žˆ๋‹ค๊ณ  ๊ฐ€์ •
27
+ main_content = soup.find('div', class_='main-content')
28
+ return main_content.get_text(strip=True) if main_content else ''
29
+
30
+ def format_script(text):
31
+ # ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ๋ณ„๋กœ ๋ถ„๋ฆฌ
32
+ sentences = text.split('.')
33
+ # 2๋ฌธ์žฅ์”ฉ ๋ฌถ์–ด์„œ ์Šคํฌ๋ฆฝํŠธ ํ˜•ํƒœ๋กœ ์ž‘์„ฑ
34
+ script = ""
35
+ for i in range(0, min(len(sentences), 10), 2):
36
+ script += sentences[i].strip() + '. '
37
+ if i+1 < len(sentences):
38
+ script += sentences[i+1].strip() + '\n'
39
+ return script
40
+
41
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ํ•จ์ˆ˜
42
+ def gradio_fetch_and_format_script(url):
43
+ html_content = get_url_content(url)
44
+ main_content = get_main_content(html_content)
45
+ return format_script(main_content)
46
+
47
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
48
  iface = gr.Interface(
49
+ fn=gradio_fetch_and_format_script,
50
  inputs=gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
51
+ outputs=gr.Textbox(label="์˜์ƒ์šฉ ์Šคํฌ๋ฆฝํŠธ")
52
  )
53
 
54
+ iface.launch()