seawolf2357 commited on
Commit
6daf2cb
ยท
verified ยท
1 Parent(s): 76666bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -23,17 +23,20 @@ def gradio_fetch_and_parse(url):
23
 
24
  def get_main_content(html_content):
25
  soup = BeautifulSoup(html_content, 'html.parser')
26
- # <script> ํƒœ๊ทธ๋ฅผ ์ฐพ์•„ ๋ณธ๋ฌธ ์ฝ˜ํ…์ธ  ์ถ”์ถœ
27
- scripts = soup.find_all('meta content')
28
- text = ''
29
- for script in scripts:
30
- if script.string:
31
- text += script.string.strip() + '\n'
32
- if text:
33
- print("์ถ”์ถœ๋œ ํ…์ŠคํŠธ:", text)
34
- return text
 
 
 
35
  else:
36
- print("๋ณธ๋ฌธ ์ฝ˜ํ…์ธ ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
37
  return ''
38
 
39
 
 
23
 
24
  def get_main_content(html_content):
25
  soup = BeautifulSoup(html_content, 'html.parser')
26
+ # <meta> ํƒœ๊ทธ์˜ content ์†์„ฑ์„ ์ฐพ์•„ ์ถ”์ถœ
27
+ meta_content = []
28
+ for meta_tag in soup.find_all('meta'):
29
+ content = meta_tag.get('content')
30
+ if content:
31
+ meta_content.append(content.strip())
32
+
33
+ extracted_content = '\n'.join(meta_content)
34
+
35
+ if extracted_content:
36
+ print("์ถ”์ถœ๋œ ๋ฉ”ํƒ€ ์ฝ˜ํ…์ธ :", extracted_content)
37
+ return extracted_content
38
  else:
39
+ print("๋ฉ”ํƒ€ ์ฝ˜ํ…์ธ ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
40
  return ''
41
 
42