File size: 18,910 Bytes
80fb263
 
0eaed2f
 
4f25906
80fb263
7a18d26
 
 
 
 
 
 
 
 
 
 
 
 
 
aff9dae
80fb263
f7d9daf
80fb263
 
f7d9daf
80fb263
 
 
4f25906
 
 
 
 
 
 
 
 
aff9dae
4f25906
f7d9daf
0eaed2f
4f25906
0eaed2f
 
4e43700
 
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
 
 
 
 
 
 
4f25906
 
a682e5d
4f25906
827719f
 
 
 
80fb263
a682e5d
80fb263
 
f7d9daf
48bc3a2
 
 
4f25906
f7d9daf
48bc3a2
f7d9daf
 
 
48bc3a2
66c6476
f7d9daf
66c6476
 
 
 
f7d9daf
4f25906
a682e5d
 
48bc3a2
a682e5d
48bc3a2
aff9dae
d81c533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06a8cb
 
aff9dae
d06a8cb
 
8b34ee3
 
 
d06a8cb
 
 
 
 
aff9dae
d06a8cb
aff9dae
d06a8cb
 
 
 
aff9dae
d06a8cb
aff9dae
8b34ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe55a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e19a5af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7d9daf
7a18d26
704e96a
 
aff9dae
 
 
 
 
 
 
827719f
aff9dae
 
 
 
 
 
704e96a
aff9dae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06a8cb
aff9dae
 
 
d06a8cb
aff9dae
d81c533
 
 
 
aff9dae
704e96a
aff9dae
 
704e96a
aff9dae
d81c533
 
 
 
 
 
aff9dae
d06a8cb
 
 
aff9dae
80fb263
8b34ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe55a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e19a5af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7d9daf
48bc3a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
import gradio as gr
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import asyncio

# HTML and JavaScript for the "Copy Code" button
copy_button_html = """
<script>
function copyCode(textareaId) {
    const text = document.querySelector(`#${textareaId} textarea`).value;
    navigator.clipboard.writeText(text).then(() => {
        alert("Text copied to clipboard!");
    }).catch(() => {
        alert("Failed to copy text.");
    });
}
</script>
"""

# Common functions
def is_valid_url(url):
    """Checks if the string is a valid URL."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])  # Check for scheme and domain
    except:
        return False

async def fetch_file_content(url):
    """Fetches the content of a file (CSS, JS, etc.) from a URL."""
    try:
        response = await asyncio.to_thread(requests.get, url, timeout=5)
        response.raise_for_status()
        return response.text
    except:
        return "Failed to fetch content."

# URL to Text Converter
async def extract_additional_resources(url):
    """Extracts links to CSS, JS, and images from HTML code."""
    try:
        response = await asyncio.to_thread(requests.get, url, timeout=5)
        response.raise_for_status()

        # Check if the content is HTML
        if 'text/html' in response.headers.get('Content-Type', ''):
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract CSS links (limit to 5)
            css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5]

            # Extract JS links (limit to 5)
            js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5]

            # Extract image links (limit to 5)
            img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5]

            # Fetch CSS and JS content asynchronously
            css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links])
            js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])

            return css_links, js_links, img_links, css_content, js_content
        else:
            # If it's not HTML, treat it as a file
            return [], [], [], [response.text], []
    except Exception as e:
        return [], [], [], [], []

async def convert_to_text(url):
    # Handle view-source: URLs
    if url.startswith("view-source:"):
        url = url[len("view-source:"):]

    if not is_valid_url(url):
        return "Error: Please enter a valid URL.", "", None, [], [], [], [], []  # Return error message and empty data

    try:
        # Set headers to mimic a browser request
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=5)
        response.raise_for_status()  # Check for HTTP errors (e.g., 404, 500)

        # Return results
        status = f"Request status: {response.status_code}"
        content_length = f"Content size: {len(response.text)} characters"
        results = f"{status}\n{content_length}"

        # Save text content to a file
        file_path = "downloaded_content.txt"
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(response.text)

        # Extract additional resources
        css_links, js_links, img_links, css_content, js_content = await extract_additional_resources(url)

        return results, response.text, file_path, css_links, js_links, img_links, css_content, js_content
    except requests.exceptions.RequestException as e:
        return f"Error: {e}", "", None, [], [], [], [], []  # Return error message and empty data

# Model to Text Converter
async def fetch_model_info(model_url):
    """Fetches model description and installation instructions."""
    try:
        if "huggingface.co" in model_url:
            # Fetch model card from Hugging Face
            response = await asyncio.to_thread(requests.get, model_url, timeout=5)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract model description
            description = soup.find("div", {"class": "prose"}).get_text(strip=True) if soup.find("div", {"class": "prose"}) else "No description available."

            # Generate installation instructions
            model_name = model_url.split("/")[-1]
            install_instructions = f"To install this model, run:\n```bash\npip install transformers\n```\nThen load the model in Python:\n```python\nfrom transformers import AutoModel, AutoTokenizer\nmodel = AutoModel.from_pretrained('{model_name}')\ntokenizer = AutoTokenizer.from_pretrained('{model_name}')\n```"

            return description, install_instructions
        elif "github.com" in model_url:
            # Fetch README from GitHub
            readme_url = f"{model_url}/raw/main/README.md"
            response = await asyncio.to_thread(requests.get, readme_url, timeout=5)
            response.raise_for_status()

            # Extract description from README
            description = response.text if response.text else "No description available."

            # Generate installation instructions
            install_instructions = f"To install this model, clone the repository:\n```bash\ngit clone {model_url}.git\ncd {model_url.split('/')[-1]}\n```"

            return description, install_instructions
        else:
            return "Unsupported repository.", ""
    except Exception as e:
        return f"Error: {e}", ""

async def fetch_model_file_content(model_url, file_path):
    """Fetches the content of a file from a model repository (Hugging Face or GitHub)."""
    try:
        # Construct the full URL to the file
        if "huggingface.co" in model_url:
            # Убираем /blob/main/ из URL, если он есть
            if "/blob/main/" in model_url:
                model_url = model_url.replace("/blob/main/", "/")
            # Hugging Face URL format: https://huggingface.co/{model}/raw/main/{file_path}
            full_url = f"{model_url}/raw/main/{file_path}"
        elif "github.com" in model_url:
            # GitHub URL format: https://github.com/{user}/{repo}/raw/main/{file_path}
            full_url = f"{model_url}/raw/main/{file_path}"
        else:
            return "Error: Unsupported repository."

        # Fetch the file content
        response = await asyncio.to_thread(requests.get, full_url, timeout=5)
        response.raise_for_status()
        return response.text
    except Exception as e:
        return f"Error: {e}"

# Space to Text Converter
async def fetch_space_file_content(space_url, file_path):
    """Fetches the content of a file from a Hugging Face Space."""
    try:
        # Construct the full URL to the file
        if "huggingface.co/spaces" in space_url:
            # Hugging Face Spaces URL format: https://huggingface.co/spaces/{user}/{space}/raw/main/{file_path}
            full_url = f"{space_url}/raw/main/{file_path}"
        else:
            return "Error: Unsupported repository. Please provide a Hugging Face Space URL."

        # Fetch the file content
        response = await asyncio.to_thread(requests.get, full_url, timeout=5)
        response.raise_for_status()
        return response.text
    except Exception as e:
        return f"Error: {e}"

# CodePen to Text Converter
async def fetch_codepen_project(codepen_url):
    """Fetches the HTML, CSS, and JavaScript content from a CodePen project."""
    try:
        # Extract the project ID from the URL
        if "codepen.io" not in codepen_url:
            return "Error: Please enter a valid CodePen URL.", "", ""

        # Fetch the CodePen project page
        response = await asyncio.to_thread(requests.get, codepen_url, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract HTML, CSS, and JS content
        html_content = soup.find("textarea", {"id": "html-input"}).text if soup.find("textarea", {"id": "html-input"}) else ""
        css_content = soup.find("textarea", {"id": "css-input"}).text if soup.find("textarea", {"id": "css-input"}) else ""
        js_content = soup.find("textarea", {"id": "js-input"}).text if soup.find("textarea", {"id": "js-input"}) else ""

        return html_content, css_content, js_content
    except Exception as e:
        return f"Error: {e}", "", ""

# Web Data Extractor
async def extract_web_data(url):
    """Extracts additional web data like description, image preview, colors, fonts, similar code, videos, and files."""
    try:
        response = await asyncio.to_thread(requests.get, url, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract site description
        description = soup.find("meta", attrs={"name": "description"})["content"] if soup.find("meta", attrs={"name": "description"}) else "No description available."

        # Extract image preview (first image on the page)
        image_preview = soup.find("img")["src"] if soup.find("img") else "No image preview available."

        # Extract colors (from CSS or inline styles)
        colors = []
        for style in soup.find_all("style"):
            colors.extend([color for color in style.text.split() if color.startswith("#")])
        colors = list(set(colors))[:5]  # Limit to 5 unique colors

        # Extract fonts (from CSS or Google Fonts)
        fonts = []
        for link in soup.find_all("link", attrs={"href": True}):
            if "fonts.googleapis.com" in link["href"]:
                fonts.append(link["href"])
        fonts = list(set(fonts))[:5]  # Limit to 5 unique fonts

        # Extract similar code (shorter version of the HTML)
        similar_code = str(soup)[:1000]  # Limit to first 1000 characters

        # Extract videos (embedded iframes)
        videos = [iframe["src"] for iframe in soup.find_all("iframe") if "src" in iframe.attrs]

        # Extract files (links to downloadable files)
        files = [a["href"] for a in soup.find_all("a", attrs={"href": True}) if a["href"].endswith((".pdf", ".zip", ".doc", ".docx", ".xls", ".xlsx"))]

        return description, image_preview, colors, fonts, similar_code, videos, files
    except Exception as e:
        return f"Error: {e}", "", [], [], "", [], []

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.HTML(copy_button_html)  # Add the "Copy Code" script

    with gr.Tabs():
        # Tab 1: URL to Text Converter
        with gr.Tab("URL to Text Converter"):
            gr.Markdown("## URL to Text Converter")
            gr.Markdown("Enter a URL to fetch its text content and download it as a .txt file.")
            
            with gr.Row():
                url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com or view-source:https://example.com")
            
            with gr.Row():
                results_output = gr.Textbox(label="Request Results", interactive=False)
                text_output = gr.Textbox(label="Text Content", interactive=True, elem_id="output-text")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"output-text\")'>Copy Code</button>")  # Add the "Copy Code" button
                file_output = gr.File(label="Download File", visible=False)  # Hidden file download component
            
            submit_button = gr.Button("Fetch Content")
            submit_button.click(
                fn=convert_to_text,
                inputs=url_input,
                outputs=[
                    results_output, text_output, file_output,
                    gr.Textbox(label="CSS Files"), gr.Textbox(label="JS Files"), gr.Textbox(label="Images"),
                    gr.Textbox(label="CSS Content"), gr.Textbox(label="JS Content")
                ]
            )

            # Add an Accordion to show/hide additional resources
            with gr.Accordion("Show/Hide Additional Resources", open=False):
                gr.Markdown("### CSS Files")
                css_output = gr.Textbox(label="CSS Files", interactive=False)
                
                gr.Markdown("### JS Files")
                js_output = gr.Textbox(label="JS Files", interactive=False)
                
                gr.Markdown("### Images")
                img_output = gr.Textbox(label="Images", interactive=False)
                
                gr.Markdown("### CSS Content")
                css_content_output = gr.Textbox(label="CSS Content", interactive=True)
                
                gr.Markdown("### JS Content")
                js_content_output = gr.Textbox(label="JS Content", interactive=True)

        # Tab 2: Model to Text Converter
        with gr.Tab("Model to Text Converter"):
            gr.Markdown("## Model to Text Converter")
            gr.Markdown("Enter a link to a model on Hugging Face or GitHub, and specify the file path.")
            
            with gr.Row():
                model_url_input = gr.Textbox(label="Model URL", placeholder="https://huggingface.co/... or https://github.com/...")
                file_path_input = gr.Textbox(label="File Path", placeholder="e.g., config.json or README.md")
            
            with gr.Row():
                model_description_output = gr.Textbox(label="Model Description", interactive=False)
                install_instructions_output = gr.Textbox(label="Installation Instructions", interactive=False)
            
            with gr.Row():
                model_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="model-content-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"model-content-output\")'>Copy Code</button>")  # Add the "Copy Code" button
            
            submit_model_button = gr.Button("Fetch Model Info and File Content")
            submit_model_button.click(
                fn=fetch_model_info,
                inputs=[model_url_input],
                outputs=[model_description_output, install_instructions_output]
            )
            submit_model_button.click(
                fn=fetch_model_file_content,
                inputs=[model_url_input, file_path_input],
                outputs=[model_content_output]
            )

        # Tab 3: Space to Text Converter
        with gr.Tab("Space to Text Converter"):
            gr.Markdown("## Space to Text Converter")
            gr.Markdown("Enter a link to a Hugging Face Space and specify the file path to fetch its content.")
            
            with gr.Row():
                space_url_input = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/...")
                space_file_path_input = gr.Textbox(label="File Path", placeholder="e.g., app.py or README.md")
            
            with gr.Row():
                space_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="space-content-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"space-content-output\")'>Copy Code</button>")  # Add the "Copy Code" button
            
            submit_space_button = gr.Button("Fetch File Content")
            submit_space_button.click(
                fn=fetch_space_file_content,
                inputs=[space_url_input, space_file_path_input],
                outputs=[space_content_output]
            )

        # Tab 4: CodePen to Text Converter
        with gr.Tab("CodePen to Text Converter"):
            gr.Markdown("## CodePen to Text Converter")
            gr.Markdown("Enter a CodePen project URL to fetch its HTML, CSS, and JavaScript content.")
            
            with gr.Row():
                codepen_url_input = gr.Textbox(label="CodePen URL", placeholder="https://codepen.io/.../pen/...")
            
            with gr.Row():
                html_output = gr.Textbox(label="HTML Content", interactive=True, elem_id="html-output")
                css_output = gr.Textbox(label="CSS Content", interactive=True, elem_id="css-output")
                js_output = gr.Textbox(label="JavaScript Content", interactive=True, elem_id="js-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"html-output\")'>Copy HTML</button>")
                gr.HTML("<button onclick='copyCode(\"css-output\")'>Copy CSS</button>")
                gr.HTML("<button onclick='copyCode(\"js-output\")'>Copy JS</button>")
            
            submit_codepen_button = gr.Button("Fetch CodePen Content")
            submit_codepen_button.click(
                fn=fetch_codepen_project,
                inputs=[codepen_url_input],
                outputs=[html_output, css_output, js_output]
            )

        # Tab 5: Web Data Extractor
        with gr.Tab("Web Data Extractor"):
            gr.Markdown("## Web Data Extractor")
            gr.Markdown("Enter a URL to extract additional web data like description, image preview, colors, fonts, similar code, videos, and files.")
            
            with gr.Row():
                web_data_url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
            
            with gr.Row():
                description_output = gr.Textbox(label="Site Description", interactive=False)
                image_preview_output = gr.Image(label="Image Preview", interactive=False)
            
            with gr.Row():
                colors_output = gr.Textbox(label="Colors", interactive=False)
                fonts_output = gr.Textbox(label="Fonts", interactive=False)
            
            with gr.Row():
                similar_code_output = gr.Textbox(label="Similar Code", interactive=True, elem_id="similar-code-output")
            
            with gr.Row():
                videos_output = gr.Textbox(label="Videos", interactive=False)
                files_output = gr.Textbox(label="Files", interactive=False)
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"similar-code-output\")'>Copy Code</button>")  # Add the "Copy Code" button
            
            submit_web_data_button = gr.Button("Extract Web Data")
            submit_web_data_button.click(
                fn=extract_web_data,
                inputs=[web_data_url_input],
                outputs=[description_output, image_preview_output, colors_output, fonts_output, similar_code_output, videos_output, files_output]
            )

# Launch the interface
demo.launch()