Nymbo commited on
Commit
adf895d
·
verified ·
1 Parent(s): c730636

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -87
app.py CHANGED
@@ -1,10 +1,9 @@
1
  # File: main/app.py
2
- # Purpose: One Space that offers five tools/tabs:
3
  # 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
4
- # 2) DDG (Concise)ultra-succinct DuckDuckGo search that emits JSONL with short keys to minimize tokens
5
- # 3) Websearchstructured DuckDuckGo search via LangChain tool (JSON)
6
- # 4) Unstructured DDGraw DuckDuckGo list[dict] rendered into a Textbox
7
- # 5) Generate Sitemap — LIMITED: grouped internal/external links with an optional per-domain cap (and a .md download)
8
 
9
  from __future__ import annotations
10
 
@@ -12,14 +11,13 @@ import re
12
  import json
13
  import sys
14
  from io import StringIO
15
- from typing import List, Dict, Literal, Tuple
16
 
17
  import gradio as gr
18
  import requests
19
  from bs4 import BeautifulSoup
20
  from readability import Document
21
  from urllib.parse import urljoin, urldefrag, urlparse
22
- from langchain_community.tools import DuckDuckGoSearchResults
23
  from duckduckgo_search import DDGS
24
 
25
 
@@ -320,53 +318,16 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
320
  return md or "No content could be extracted."
321
 
322
 
323
- # ==========================
324
- # Websearch: DuckDuckGo tool
325
- # ==========================
326
-
327
- def Search_Structured( # <-- MCP tool #3 (Structured DDG)
328
- input_query: str,
329
- max_results: int = 5,
330
- ) -> List[Dict[Literal["snippet", "title", "link"], str]]:
331
- """
332
- Run a DuckDuckGo search and return structured results as a list of dictionaries.
333
- (Layman's terms: search DDG and get clean JSON objects.)
334
- """
335
- if not input_query or not input_query.strip():
336
- return []
337
-
338
- # Create the search tool (LangChain community wrapper)
339
- search = DuckDuckGoSearchResults(output_format="list", num_results=max_results)
340
-
341
- # Run the search and return results as a list of dicts
342
- results = search.invoke(input_query)
343
- return results
344
-
345
-
346
- # ========================================
347
- # Unstructured DDG: raw list into Textbox
348
- # ========================================
349
-
350
- def Search_Raw( # <-- MCP tool #4 (Unstructured DDG)
351
- query: str,
352
- ) -> list[dict]:
353
- """
354
- Run a DuckDuckGo search using the native `duckduckgo_search` client and return the
355
- raw Python list of dictionaries from the library.
356
- (Layman's terms: search DDG and show exactly what the library returns.)
357
- """
358
- if not query or not query.strip():
359
- return []
360
- with DDGS() as ddgs:
361
- results = ddgs.text(query, max_results=5)
362
- return results
363
 
364
 
365
  # ============================================
366
  # Concise DDG: ultra-succinct JSONL for tokens
367
  # ============================================
368
 
369
- def Search_Concise( # <-- MCP tool #2 (Concise DDG)
370
  query: str,
371
  max_results: int = 5,
372
  include_snippets: bool = False,
@@ -595,7 +556,7 @@ fetch_interface = gr.Interface(
595
 
596
  # --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
597
  concise_interface = gr.Interface(
598
- fn=Search_Concise,
599
  inputs=[
600
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
601
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
@@ -605,7 +566,7 @@ concise_interface = gr.Interface(
605
  gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
606
  ],
607
  outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
608
- title="DuckDuckGo Search (Concise)",
609
  description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
610
  api_description=(
611
  "Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
@@ -617,39 +578,7 @@ concise_interface = gr.Interface(
617
  submit_btn="Search",
618
  )
619
 
620
- # --- Websearch tab (structured DDG via LangChain) ---
621
- websearch_interface = gr.Interface(
622
- fn=Search_Structured, # connect the function to the UI
623
- inputs=[
624
- gr.Textbox(value="", label="Search query", placeholder="site:example.com interesting topic"),
625
- gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
626
- ],
627
- outputs=gr.JSON(label="Search results"),
628
- title="DuckDuckGo Search (Structured)",
629
- description="Search the web using DuckDuckGo; returns snippet, title, and link.",
630
- api_description=(
631
- "Run a DuckDuckGo web search and return a list of objects with keys: "
632
- "snippet, title, and link. Configure the number of results."
633
- ),
634
- allow_flagging="never",
635
- theme="Nymbo/Nymbo_Theme",
636
- )
637
-
638
- # --- Unstructured DDG tab (matches your separate app���s output) ---
639
- unstructured_interface = gr.Interface(
640
- fn=Search_Raw,
641
- inputs=gr.Textbox(label="Enter Search Query"),
642
- outputs=gr.Textbox(label="Results", interactive=False),
643
- title="DuckDuckGo Search (Raw)",
644
- description="Returns the raw list of results (list[dict]) shown as text.",
645
- api_description=(
646
- "Run DuckDuckGo via the native client and return the raw list[dict] as "
647
- "provided by duckduckgo_search (fields like title, href/link, body/snippet)."
648
- ),
649
- allow_flagging="never",
650
- theme="Nymbo/Nymbo_Theme",
651
- submit_btn="Search",
652
- )
653
 
654
  # --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
655
  sitemap_interface = gr.Interface(
@@ -692,12 +621,10 @@ code_interface = gr.Interface(
692
 
693
  # --- Combine all into a single app with tabs ---
694
  demo = gr.TabbedInterface(
695
- interface_list=[fetch_interface, concise_interface, websearch_interface, unstructured_interface, sitemap_interface, code_interface],
696
  tab_names=[
697
  "Fetch Webpage",
698
- "DuckDuckGo Search (Concise)",
699
- "DuckDuckGo Search (Structured)",
700
- "DuckDuckGo Search (Raw)",
701
  "Generate Sitemap",
702
  "Python Code Executor",
703
  ],
 
1
  # File: main/app.py
2
+ # Purpose: One Space that offers four tools/tabs:
3
  # 1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
4
+ # 2) DuckDuckGo Searchcompact JSONL search output (short keys to minimize tokens)
5
+ # 3) Generate Sitemap grouped internal/external links with an optional per-domain cap
6
+ # 4) Python Code Executor run Python code and capture stdout/errors
 
7
 
8
  from __future__ import annotations
9
 
 
11
  import json
12
  import sys
13
  from io import StringIO
14
+ from typing import List, Dict, Tuple
15
 
16
  import gradio as gr
17
  import requests
18
  from bs4 import BeautifulSoup
19
  from readability import Document
20
  from urllib.parse import urljoin, urldefrag, urlparse
 
21
  from duckduckgo_search import DDGS
22
 
23
 
 
318
  return md or "No content could be extracted."
319
 
320
 
321
+ # ===============================
322
+ # DuckDuckGo Search (JSONL lines)
323
+ # ===============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
 
326
  # ============================================
327
  # Concise DDG: ultra-succinct JSONL for tokens
328
  # ============================================
329
 
330
+ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
331
  query: str,
332
  max_results: int = 5,
333
  include_snippets: bool = False,
 
556
 
557
  # --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
558
  concise_interface = gr.Interface(
559
+ fn=Search_DuckDuckGo,
560
  inputs=[
561
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
562
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
 
566
  gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
567
  ],
568
  outputs=gr.Textbox(label="Results (JSONL)", interactive=False),
569
+ title="DuckDuckGo Search",
570
  description="Emits JSONL with short keys (t,u[,s]). Defaults avoid snippets and duplicate domains.",
571
  api_description=(
572
  "Run a DuckDuckGo search and return newline-delimited JSON with short keys: "
 
578
  submit_btn="Search",
579
  )
580
 
581
+ ## Removed Structured and Raw tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
  # --- Generate Sitemap tab (LIMITED, grouped + optional per-domain cap) ---
584
  sitemap_interface = gr.Interface(
 
621
 
622
  # --- Combine all into a single app with tabs ---
623
  demo = gr.TabbedInterface(
624
+ interface_list=[fetch_interface, concise_interface, sitemap_interface, code_interface],
625
  tab_names=[
626
  "Fetch Webpage",
627
+ "DuckDuckGo Search",
 
 
628
  "Generate Sitemap",
629
  "Python Code Executor",
630
  ],