Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
# File: main/app.py
|
2 |
-
# Purpose: One Space that offers
|
3 |
# 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
|
4 |
-
# 2)
|
5 |
-
# 3)
|
6 |
-
# 4)
|
7 |
-
# 5) Generate Sitemap — LIMITED: grouped internal/external links with an optional per-domain cap (and a .md download)
|
8 |
|
9 |
from __future__ import annotations
|
10 |
|
@@ -12,14 +11,13 @@ import re
|
|
12 |
import json
|
13 |
import sys
|
14 |
from io import StringIO
|
15 |
-
from typing import List, Dict,
|
16 |
|
17 |
import gradio as gr
|
18 |
import requests
|
19 |
from bs4 import BeautifulSoup
|
20 |
from readability import Document
|
21 |
from urllib.parse import urljoin, urldefrag, urlparse
|
22 |
-
from langchain_community.tools import DuckDuckGoSearchResults
|
23 |
from duckduckgo_search import DDGS
|
24 |
|
25 |
|
@@ -320,53 +318,16 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
|
|
320 |
return md or "No content could be extracted."
|
321 |
|
322 |
|
323 |
-
#
|
324 |
-
#
|
325 |
-
#
|
326 |
-
|
327 |
-
def Search_Structured( # <-- MCP tool #3 (Structured DDG)
|
328 |
-
input_query: str,
|
329 |
-
max_results: int = 5,
|
330 |
-
) -> List[Dict[Literal["snippet", "title", "link"], str]]:
|
331 |
-
"""
|
332 |
-
Run a DuckDuckGo search and return structured results as a list of dictionaries.
|
333 |
-
(Layman's terms: search DDG and get clean JSON objects.)
|
334 |
-
"""
|
335 |
-
if not input_query or not input_query.strip():
|
336 |
-
return []
|
337 |
-
|
338 |
-
# Create the search tool (LangChain community wrapper)
|
339 |
-
search = DuckDuckGoSearchResults(output_format="list", num_results=max_results)
|
340 |
-
|
341 |
-
# Run the search and return results as a list of dicts
|
342 |
-
results = search.invoke(input_query)
|
343 |
-
return results
|
344 |
-
|
345 |
-
|
346 |
-
# ========================================
|
347 |
-
# Unstructured DDG: raw list into Textbox
|
348 |
-
# ========================================
|
349 |
-
|
350 |
-
def Search_Raw( # <-- MCP tool #4 (Unstructured DDG)
|
351 |
-
query: str,
|
352 |
-
) -> list[dict]:
|
353 |
-
"""
|
354 |
-
Run a DuckDuckGo search using the native `duckduckgo_search` client and return the
|
355 |
-
raw Python list of dictionaries from the library.
|
356 |
-
(Layman's terms: search DDG and show exactly what the library returns.)
|
357 |
-
"""
|
358 |
-
if not query or not query.strip():
|
359 |
-
return []
|
360 |
-
with DDGS() as ddgs:
|
361 |
-
results = ddgs.text(query, max_results=5)
|
362 |
-
return results
|
363 |
|
364 |
|
365 |
# ============================================
|
366 |
# Concise DDG: ultra-succinct JSONL for tokens
|
367 |
# ============================================
|
368 |
|
369 |
-
def
|
370 |
query: str,
|
371 |
max_results: int = 5,
|
372 |
include_snippets: bool = False,
|
@@ -595,7 +556,7 @@ fetch_interface = gr.Interface(
|
|
595 |
|
596 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
597 |
concise_interface = gr.Interface(
|
598 |
-
fn=
|
599 |
inputs=[
|
600 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
601 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
@@ -605,7 +566,7 @@ concise_interface = gr.Interface(
|
|
605 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
606 |
],
|
607 |
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
608 |
-
title="DuckDuckGo Search
|
609 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
610 |
api_description=(
|
611 |
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
@@ -617,39 +578,7 @@ concise_interface = gr.Interface(
|
|
617 |
submit_btn="Search",
|
618 |
)
|
619 |
|
620 |
-
|
621 |
-
websearch_interface = gr.Interface(
|
622 |
-
fn=Search_Structured, # connect the function to the UI
|
623 |
-
inputs=[
|
624 |
-
gr.Textbox(value="", label="Search query", placeholder="site:example.com interesting topic"),
|
625 |
-
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
626 |
-
],
|
627 |
-
outputs=gr.JSON(label="Search results"),
|
628 |
-
title="DuckDuckGo Search (Structured)",
|
629 |
-
description="Search the web using DuckDuckGo; returns snippet, title, and link.",
|
630 |
-
api_description=(
|
631 |
-
"Run a DuckDuckGo web search and return a list of objects with keys: "
|
632 |
-
"snippet, title, and link. Configure the number of results."
|
633 |
-
),
|
634 |
-
allow_flagging="never",
|
635 |
-
theme="Nymbo/Nymbo_Theme",
|
636 |
-
)
|
637 |
-
|
638 |
-
# --- Unstructured DDG tab (matches your separate app���s output) ---
|
639 |
-
unstructured_interface = gr.Interface(
|
640 |
-
fn=Search_Raw,
|
641 |
-
inputs=gr.Textbox(label="Enter Search Query"),
|
642 |
-
outputs=gr.Textbox(label="Results", interactive=False),
|
643 |
-
title="DuckDuckGo Search (Raw)",
|
644 |
-
description="Returns the raw list of results (list[dict]) shown as text.",
|
645 |
-
api_description=(
|
646 |
-
"Run DuckDuckGo via the native client and return the raw list[dict] as "
|
647 |
-
"provided by duckduckgo_search (fields like title, href/link, body/snippet)."
|
648 |
-
),
|
649 |
-
allow_flagging="never",
|
650 |
-
theme="Nymbo/Nymbo_Theme",
|
651 |
-
submit_btn="Search",
|
652 |
-
)
|
653 |
|
654 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
655 |
sitemap_interface = gr.Interface(
|
@@ -692,12 +621,10 @@ code_interface = gr.Interface(
|
|
692 |
|
693 |
# --- Combine all into a single app with tabs ---
|
694 |
demo = gr.TabbedInterface(
|
695 |
-
interface_list=[fetch_interface, concise_interface,
|
696 |
tab_names=[
|
697 |
"Fetch Webpage",
|
698 |
-
"DuckDuckGo Search
|
699 |
-
"DuckDuckGo Search (Structured)",
|
700 |
-
"DuckDuckGo Search (Raw)",
|
701 |
"Generate Sitemap",
|
702 |
"Python Code Executor",
|
703 |
],
|
|
|
1 |
# File: main/app.py
|
2 |
+
# Purpose: One Space that offers four tools/tabs:
|
3 |
# 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
|
4 |
+
# 2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
|
5 |
+
# 3) Generate Sitemap — grouped internal/external links with an optional per-domain cap
|
6 |
+
# 4) Python Code Executor — run Python code and capture stdout/errors
|
|
|
7 |
|
8 |
from __future__ import annotations
|
9 |
|
|
|
11 |
import json
|
12 |
import sys
|
13 |
from io import StringIO
|
14 |
+
from typing import List, Dict, Tuple
|
15 |
|
16 |
import gradio as gr
|
17 |
import requests
|
18 |
from bs4 import BeautifulSoup
|
19 |
from readability import Document
|
20 |
from urllib.parse import urljoin, urldefrag, urlparse
|
|
|
21 |
from duckduckgo_search import DDGS
|
22 |
|
23 |
|
|
|
318 |
return md or "No content could be extracted."
|
319 |
|
320 |
|
321 |
+
# ===============================
|
322 |
+
# DuckDuckGo Search (JSONL lines)
|
323 |
+
# ===============================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
|
325 |
|
326 |
# ============================================
|
327 |
# Concise DDG: ultra-succinct JSONL for tokens
|
328 |
# ============================================
|
329 |
|
330 |
+
def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
|
331 |
query: str,
|
332 |
max_results: int = 5,
|
333 |
include_snippets: bool = False,
|
|
|
556 |
|
557 |
# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
|
558 |
concise_interface = gr.Interface(
|
559 |
+
fn=Search_DuckDuckGo,
|
560 |
inputs=[
|
561 |
gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
|
562 |
gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
|
|
|
566 |
gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
|
567 |
],
|
568 |
outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
|
569 |
+
title="DuckDuckGo Search",
|
570 |
description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
|
571 |
api_description=(
|
572 |
"Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
|
|
|
578 |
submit_btn="Search",
|
579 |
)
|
580 |
|
581 |
+
## Removed Structured and Raw tabs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
# --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
|
584 |
sitemap_interface = gr.Interface(
|
|
|
621 |
|
622 |
# --- Combine all into a single app with tabs ---
|
623 |
demo = gr.TabbedInterface(
|
624 |
+
interface_list=[fetch_interface, concise_interface, sitemap_interface, code_interface],
|
625 |
tab_names=[
|
626 |
"Fetch Webpage",
|
627 |
+
"DuckDuckGo Search",
|
|
|
|
|
628 |
"Generate Sitemap",
|
629 |
"Python Code Executor",
|
630 |
],
|