Spaces:
Running
Running
Łukasz Mariański
neon_arch
commited on
✨ Option to use a proxy for outgoing `upstream search engine` requests (#573)
Browse files* Add option to use a proxy for outgoing (search engine) requests.
* Enable socks feature in reqwest
* Fix formatting
* add proxy feature
* Update src/config/parser.rs
Co-authored-by: neon_arch <[email protected]>
* Update websurfx/config.lua
Co-authored-by: neon_arch <[email protected]>
* Update Cargo.toml
Co-authored-by: neon_arch <[email protected]>
* fix
* Update Cargo.toml
Co-authored-by: neon_arch <[email protected]>
---------
Co-authored-by: neon_arch <[email protected]>
- Cargo.toml +2 -1
- src/config/parser.rs +12 -0
- src/results/aggregator.rs +10 -4
- websurfx/config.lua +2 -0
Cargo.toml
CHANGED
|
@@ -17,7 +17,8 @@ reqwest = { version = "0.12.5", default-features = false, features = [
|
|
| 17 |
"rustls-tls",
|
| 18 |
"brotli",
|
| 19 |
"gzip",
|
| 20 |
-
"http2"
|
|
|
|
| 21 |
] }
|
| 22 |
tokio = { version = "1.32.0", features = [
|
| 23 |
"rt-multi-thread",
|
|
|
|
| 17 |
"rustls-tls",
|
| 18 |
"brotli",
|
| 19 |
"gzip",
|
| 20 |
+
"http2",
|
| 21 |
+
"socks",
|
| 22 |
] }
|
| 23 |
tokio = { version = "1.32.0", features = [
|
| 24 |
"rt-multi-thread",
|
src/config/parser.rs
CHANGED
|
@@ -6,6 +6,7 @@ use crate::handler::{file_path, FileType};
|
|
| 6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
| 7 |
use log::LevelFilter;
|
| 8 |
use mlua::Lua;
|
|
|
|
| 9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
| 10 |
|
| 11 |
/// A named struct which stores the parsed config file options.
|
|
@@ -48,6 +49,8 @@ pub struct Config {
|
|
| 48 |
pub tcp_connection_keep_alive: u8,
|
| 49 |
/// It stores the pool idle connection timeout in seconds.
|
| 50 |
pub pool_idle_connection_timeout: u8,
|
|
|
|
|
|
|
| 51 |
/// It stores the number of https connections to keep in the pool.
|
| 52 |
pub number_of_https_connections: u8,
|
| 53 |
}
|
|
@@ -120,6 +123,14 @@ impl Config {
|
|
| 120 |
_ => parsed_cet,
|
| 121 |
};
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
Ok(Config {
|
| 124 |
port: globals.get::<_, u16>("port")?,
|
| 125 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
|
@@ -151,6 +162,7 @@ impl Config {
|
|
| 151 |
safe_search,
|
| 152 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
| 153 |
cache_expiry_time,
|
|
|
|
| 154 |
})
|
| 155 |
}
|
| 156 |
}
|
|
|
|
| 6 |
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
| 7 |
use log::LevelFilter;
|
| 8 |
use mlua::Lua;
|
| 9 |
+
use reqwest::Proxy;
|
| 10 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
| 11 |
|
| 12 |
/// A named struct which stores the parsed config file options.
|
|
|
|
| 49 |
pub tcp_connection_keep_alive: u8,
|
| 50 |
/// It stores the pool idle connection timeout in seconds.
|
| 51 |
pub pool_idle_connection_timeout: u8,
|
| 52 |
+
/// Url of the proxy to use for outgoing requests.
|
| 53 |
+
pub proxy: Option<Proxy>,
|
| 54 |
/// It stores the number of https connections to keep in the pool.
|
| 55 |
pub number_of_https_connections: u8,
|
| 56 |
}
|
|
|
|
| 123 |
_ => parsed_cet,
|
| 124 |
};
|
| 125 |
|
| 126 |
+
let proxy_opt = globals.get::<_, Option<String>>("proxy")?;
|
| 127 |
+
let proxy = proxy_opt.and_then(|proxy_str| {
|
| 128 |
+
Proxy::all(proxy_str).ok().and_then(|_| {
|
| 129 |
+
log::error!("Invalid proxy url, defaulting to no proxy.");
|
| 130 |
+
None
|
| 131 |
+
})
|
| 132 |
+
});
|
| 133 |
+
|
| 134 |
Ok(Config {
|
| 135 |
port: globals.get::<_, u16>("port")?,
|
| 136 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
|
|
|
| 162 |
safe_search,
|
| 163 |
#[cfg(any(feature = "redis-cache", feature = "memory-cache"))]
|
| 164 |
cache_expiry_time,
|
| 165 |
+
proxy,
|
| 166 |
})
|
| 167 |
}
|
| 168 |
}
|
src/results/aggregator.rs
CHANGED
|
@@ -75,7 +75,7 @@ pub async fn aggregate(
|
|
| 75 |
safe_search: u8,
|
| 76 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 77 |
let client = CLIENT.get_or_init(|| {
|
| 78 |
-
ClientBuilder::new()
|
| 79 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
| 80 |
.pool_idle_timeout(Duration::from_secs(
|
| 81 |
config.pool_idle_connection_timeout as u64,
|
|
@@ -86,9 +86,13 @@ pub async fn aggregate(
|
|
| 86 |
.https_only(true)
|
| 87 |
.gzip(true)
|
| 88 |
.brotli(true)
|
| 89 |
-
.http2_adaptive_window(config.adaptive_window)
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
});
|
| 93 |
|
| 94 |
let user_agent: &str = random_user_agent();
|
|
@@ -242,6 +246,7 @@ pub async fn filter_with_lists(
|
|
| 242 |
|
| 243 |
Ok(())
|
| 244 |
}
|
|
|
|
| 245 |
/// Sorts SearchResults by relevance score.
|
| 246 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
| 247 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
|
@@ -257,6 +262,7 @@ fn sort_search_results(results: &mut [SearchResult]) {
|
|
| 257 |
.unwrap_or(Ordering::Less)
|
| 258 |
})
|
| 259 |
}
|
|
|
|
| 260 |
#[cfg(test)]
|
| 261 |
mod tests {
|
| 262 |
use super::*;
|
|
|
|
| 75 |
safe_search: u8,
|
| 76 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 77 |
let client = CLIENT.get_or_init(|| {
|
| 78 |
+
let mut cb = ClientBuilder::new()
|
| 79 |
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
| 80 |
.pool_idle_timeout(Duration::from_secs(
|
| 81 |
config.pool_idle_connection_timeout as u64,
|
|
|
|
| 86 |
.https_only(true)
|
| 87 |
.gzip(true)
|
| 88 |
.brotli(true)
|
| 89 |
+
.http2_adaptive_window(config.adaptive_window);
|
| 90 |
+
|
| 91 |
+
if config.proxy.is_some() {
|
| 92 |
+
cb = cb.proxy(config.proxy.clone().unwrap());
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
cb.build().unwrap()
|
| 96 |
});
|
| 97 |
|
| 98 |
let user_agent: &str = random_user_agent();
|
|
|
|
| 246 |
|
| 247 |
Ok(())
|
| 248 |
}
|
| 249 |
+
|
| 250 |
/// Sorts SearchResults by relevance score.
|
| 251 |
/// <br> sort_unstable is used as its faster,stability is not an issue on our side.
|
| 252 |
/// For reasons why, check out [`this`](https://rust-lang.github.io/rfcs/1884-unstable-sort.html)
|
|
|
|
| 262 |
.unwrap_or(Ordering::Less)
|
| 263 |
})
|
| 264 |
}
|
| 265 |
+
|
| 266 |
#[cfg(test)]
|
| 267 |
mod tests {
|
| 268 |
use super::*;
|
websurfx/config.lua
CHANGED
|
@@ -75,3 +75,5 @@ upstream_search_engines = {
|
|
| 75 |
Mojeek = false,
|
| 76 |
Bing = false,
|
| 77 |
} -- select the upstream search engines from which the results should be fetched.
|
|
|
|
|
|
|
|
|
| 75 |
Mojeek = false,
|
| 76 |
Bing = false,
|
| 77 |
} -- select the upstream search engines from which the results should be fetched.
|
| 78 |
+
|
| 79 |
+
proxy = nil -- Proxy to send outgoing requests through. Set to nil to disable.
|