File size: 1,116 Bytes
96dd5a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
import requests
from bs4 import BeautifulSoup
import os
import tempfile
import re

title = 'Web Scraper'
description = '''
'''


def get_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        text = soup.get_text(separator="\n")

        with tempfile.NamedTemporaryFile(mode="w", delete=False, encoding="utf-8") as file:
            filename = file.name
            file.write(text)

        return filename  # Return the temporary file path
    except requests.exceptions.RequestException as e:
        print("Error fetching the URL:", e)
        return None
    except Exception as e:
        print("Error:", e)
        return None


def extract_text_from_html(urls):
    return [get_from_url(url) for url in urls.split("\n")]


interface = gr.Interface(fn=extract_text_from_html, inputs=[gr.Textbox(label="Url input")],
                         outputs=[gr.File(label="Scrapped Text")], title=title,
                         description=description)

interface.launch(debug=True)