Shreyas094 commited on
Commit
8b21f9c
·
verified ·
1 Parent(s): 60fab24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -107
app.py CHANGED
@@ -1,119 +1,107 @@
1
  import gradio as gr
2
  import requests
3
- import json
4
- import random
5
- import logging
6
- import time
7
- from functools import lru_cache
8
 
9
- # Configure logging
10
- logging.basicConfig(
11
- level=logging.INFO,
12
- format='%(asctime)s [%(levelname)s] %(message)s',
13
- handlers=[
14
- logging.FileHandler("searxng_search.log"),
15
- logging.StreamHandler()
16
- ]
17
- )
18
 
19
- # List of SearXNG instances to try
20
- SEARXNG_INSTANCES = [
21
- "https://northboot.xyz",
22
- "https://search.hbubli.cc",
23
- "https://searx.tiekoetter.com",
24
- "https://search.bus-hit.me",
25
- # Add more instances here
26
- ]
27
-
28
- def search_news(query, num_results=10, max_retries=3):
29
- # Shuffle the list of instances to distribute load
30
- random.shuffle(SEARXNG_INSTANCES)
31
-
32
- headers = {
33
- "User-Agent": "SearXNG-NewsSearch/1.0"
 
34
  }
35
-
36
- for searxng_url in SEARXNG_INSTANCES:
37
- params = {
38
- "q": query,
39
- "categories": "news",
40
- "format": "json",
41
- "language": "en",
42
- "page": 1,
43
- "engines": "google_news,bing_news,yahoo_news",
44
- "count": num_results
45
- # Omitted 'time_range'
46
- }
47
-
48
- retry_count = 0
49
- backoff_time = 1 # Start with 1 second
50
 
51
- while retry_count <= max_retries:
52
- try:
53
- response = requests.get(
54
- f"{searxng_url}/search",
55
- params=params,
56
- headers=headers,
57
- timeout=10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  )
59
- if response.status_code == 429:
60
- # If rate limited, wait and retry
61
- logging.warning(f"Rate limited by {searxng_url}. Retrying in {backoff_time} seconds...")
62
- time.sleep(backoff_time)
63
- retry_count += 1
64
- backoff_time *= 2 # Exponential backoff
65
- continue
66
- response.raise_for_status()
67
- results = response.json()
68
- news_items = results.get("news", [])
69
- if news_items:
70
- logging.info(f"Success from instance: {searxng_url}")
71
- return news_items, None
72
- else:
73
- logging.info(f"No results from {searxng_url}")
74
- break # Move to the next instance
75
- except requests.RequestException as e:
76
- logging.warning(f"Instance {searxng_url} failed: {e}")
77
- break # Move to the next instance if a non-rate limit error occurs
78
-
79
- return [], "Unable to fetch results from any SearXNG instance. Please try again later."
80
 
81
- @lru_cache(maxsize=128)
82
- def cached_search_news(query, num_results=10):
83
- return search_news(query, num_results)
84
 
85
- def format_news_markdown(news_items, error=None):
86
- if error:
87
- return f"**Error:** {error}"
88
-
89
- if not news_items:
90
- return "**No news items found.**"
91
-
92
- formatted_results = ""
93
- for i, item in enumerate(news_items, 1):
94
- title = item.get('title', 'No Title')
95
- url = item.get('url', '#')
96
- published_date = item.get('published', 'N/A')
97
- content = item.get('content', 'N/A')
98
-
99
- formatted_results += f"### {i}. [{title}]({url})\n"
100
- formatted_results += f"**Published:** {published_date}\n\n"
101
- formatted_results += f"{content[:150]}...\n\n"
102
- return formatted_results
103
 
104
- def gradio_search_news(query, num_results):
105
- news_items, error = cached_search_news(query, int(num_results))
106
- return format_news_markdown(news_items, error)
107
 
108
- iface = gr.Interface(
109
- fn=gradio_search_news,
110
- inputs=[
111
- gr.Textbox(label="Enter a news topic to search for", placeholder="e.g., Artificial Intelligence"),
112
- gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of results")
113
- ],
114
- outputs=gr.Markdown(label="Search Results"),
115
- title="SearXNG News Search",
116
- description="Search for news articles using the SearXNG metasearch engine. If one instance fails, it will try others."
117
- )
118
 
119
- iface.launch()
 
 
1
  import gradio as gr
2
  import requests
 
 
 
 
 
3
 
4
+ def search_searx(query, instance_url='https://searx.org', lang='en_US', categories='general'):
5
+ """
6
+ Perform a search using the Searx API.
 
 
 
 
 
 
7
 
8
+ :param query: The search query string.
9
+ :param instance_url: The URL of the Searx instance.
10
+ :param lang: Language for search results (e.g., 'en_US').
11
+ :param categories: Categories to search in (e.g., 'general', 'images', 'videos').
12
+ :return: A list of formatted search results or an error message.
13
+ """
14
+ search_endpoint = f"{instance_url}/search"
15
+ params = {
16
+ 'q': query,
17
+ 'format': 'json',
18
+ 'language': lang,
19
+ 'categories': categories,
20
+ 'pageno': 1, # Page number
21
+ 'time_range': '', # Time range filter
22
+ 'engines': '', # Specify search engines, comma-separated
23
+ 'safesearch': '0' # Safe search (0: off, 1: moderate, 2: strict)
24
  }
25
+
26
+ try:
27
+ response = requests.get(search_endpoint, params=params, timeout=10)
28
+ response.raise_for_status() # Raise an error for bad status codes
29
+ data = response.json()
 
 
 
 
 
 
 
 
 
 
30
 
31
+ if 'results' not in data or not data['results']:
32
+ return "No results found."
33
+
34
+ formatted_results = ""
35
+ for idx, result in enumerate(data['results'], start=1):
36
+ title = result.get('title', 'No Title')
37
+ url = result.get('url', 'No URL')
38
+ snippet = result.get('content', 'No Description')
39
+ formatted_results += f"**{idx}. {title}**\n{url}\n{snippet}\n\n"
40
+
41
+ return formatted_results
42
+
43
+ except requests.exceptions.RequestException as e:
44
+ return f"An error occurred while searching: {e}"
45
+
46
+ def create_gradio_interface():
47
+ """
48
+ Creates and returns the Gradio interface.
49
+ """
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("# 🕵️‍♂️ Private Search with Searx and Gradio")
52
+ gr.Markdown(
53
+ "This application allows you to perform private searches using the [Searx](https://searx.org/) metasearch engine."
54
+ )
55
+
56
+ with gr.Row():
57
+ with gr.Column():
58
+ query = gr.Textbox(
59
+ label="Search Query",
60
+ placeholder="Enter your search query here...",
61
+ lines=1
62
  )
63
+ instance_url = gr.Textbox(
64
+ label="Searx Instance URL",
65
+ value="https://searx.org",
66
+ placeholder="https://searx.instance.url",
67
+ lines=1
68
+ )
69
+ lang = gr.Textbox(
70
+ label="Language",
71
+ value="en_US",
72
+ placeholder="e.g., en_US",
73
+ lines=1
74
+ )
75
+ categories = gr.Textbox(
76
+ label="Categories",
77
+ value="general",
78
+ placeholder="e.g., general, images, videos",
79
+ lines=1
80
+ )
81
+ search_button = gr.Button("Search")
 
 
82
 
83
+ with gr.Column():
84
+ results = gr.Markdown("### Search Results will appear here...")
 
85
 
86
+ def perform_search(q, url, language, cats):
87
+ return search_searx(q, instance_url=url, lang=language, categories=cats)
88
+
89
+ search_button.click(
90
+ perform_search,
91
+ inputs=[query, instance_url, lang, categories],
92
+ outputs=results
93
+ )
94
+
95
+ gr.Markdown(
96
+ """
97
+ ---
98
+ **Note:** This application uses the Searx metasearch engine to fetch results from multiple sources while preserving your privacy.
99
+ """
100
+ )
 
 
 
101
 
102
+ return demo
 
 
103
 
104
+ iface = create_gradio_interface()
 
 
 
 
 
 
 
 
 
105
 
106
+ if __name__ == "__main__":
107
+ iface.launch()