Spaces:
Running
Running
ddotthomas
neon_arch
commited on
:sparkles: config option to configure the https adaptive window size for requests (#529)
Browse files* Added config option to enable the reqwest client adaptive window
* Change adaptive window config name
Co-authored-by: neon_arch <[email protected]>
* Modified documentation
* Trimmed down aggregate parameters
---------
Co-authored-by: neon_arch <[email protected]>
- src/config/parser.rs +4 -0
- src/results/aggregator.rs +5 -5
- src/server/routes/search.rs +1 -3
- websurfx/config.lua +2 -0
src/config/parser.rs
CHANGED
|
@@ -30,6 +30,8 @@ pub struct Config {
|
|
| 30 |
pub logging: bool,
|
| 31 |
/// It stores the option to whether enable or disable debug mode.
|
| 32 |
pub debug: bool,
|
|
|
|
|
|
|
| 33 |
/// It stores all the engine names that were enabled by the user.
|
| 34 |
pub upstream_search_engines: HashMap<String, bool>,
|
| 35 |
/// It stores the time (secs) which controls the server request timeout.
|
|
@@ -68,6 +70,7 @@ impl Config {
|
|
| 68 |
|
| 69 |
let debug: bool = globals.get::<_, bool>("debug")?;
|
| 70 |
let logging: bool = globals.get::<_, bool>("logging")?;
|
|
|
|
| 71 |
|
| 72 |
if !logging_initialized {
|
| 73 |
set_logging_level(debug, logging);
|
|
@@ -125,6 +128,7 @@ impl Config {
|
|
| 125 |
},
|
| 126 |
logging,
|
| 127 |
debug,
|
|
|
|
| 128 |
upstream_search_engines: globals
|
| 129 |
.get::<_, HashMap<String, bool>>("upstream_search_engines")?,
|
| 130 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
|
|
|
| 30 |
pub logging: bool,
|
| 31 |
/// It stores the option to whether enable or disable debug mode.
|
| 32 |
pub debug: bool,
|
| 33 |
+
/// It toggles whether to use adaptive HTTP windows
|
| 34 |
+
pub adaptive_window: bool,
|
| 35 |
/// It stores all the engine names that were enabled by the user.
|
| 36 |
pub upstream_search_engines: HashMap<String, bool>,
|
| 37 |
/// It stores the time (secs) which controls the server request timeout.
|
|
|
|
| 70 |
|
| 71 |
let debug: bool = globals.get::<_, bool>("debug")?;
|
| 72 |
let logging: bool = globals.get::<_, bool>("logging")?;
|
| 73 |
+
let adaptive_window: bool = globals.get::<_, bool>("adaptive_window")?;
|
| 74 |
|
| 75 |
if !logging_initialized {
|
| 76 |
set_logging_level(debug, logging);
|
|
|
|
| 128 |
},
|
| 129 |
logging,
|
| 130 |
debug,
|
| 131 |
+
adaptive_window,
|
| 132 |
upstream_search_engines: globals
|
| 133 |
.get::<_, HashMap<String, bool>>("upstream_search_engines")?,
|
| 134 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
src/results/aggregator.rs
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
//! search engines and then removes duplicate results.
|
| 3 |
|
| 4 |
use super::user_agent::random_user_agent;
|
|
|
|
| 5 |
use crate::handler::{file_path, FileType};
|
| 6 |
use crate::models::{
|
| 7 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
|
@@ -66,18 +67,17 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|
| 66 |
pub async fn aggregate(
|
| 67 |
query: &str,
|
| 68 |
page: u32,
|
| 69 |
-
|
| 70 |
-
debug: bool,
|
| 71 |
upstream_search_engines: &[EngineHandler],
|
| 72 |
-
request_timeout: u8,
|
| 73 |
safe_search: u8,
|
| 74 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 75 |
let client = CLIENT.get_or_init(|| {
|
| 76 |
ClientBuilder::new()
|
| 77 |
-
.timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
| 78 |
.https_only(true)
|
| 79 |
.gzip(true)
|
| 80 |
.brotli(true)
|
|
|
|
| 81 |
.build()
|
| 82 |
.unwrap()
|
| 83 |
});
|
|
@@ -85,7 +85,7 @@ pub async fn aggregate(
|
|
| 85 |
let user_agent: &str = random_user_agent();
|
| 86 |
|
| 87 |
// Add a random delay before making the request.
|
| 88 |
-
if random_delay || !debug {
|
| 89 |
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
| 90 |
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
| 91 |
tokio::time::sleep(Duration::from_secs(delay)).await;
|
|
|
|
| 2 |
//! search engines and then removes duplicate results.
|
| 3 |
|
| 4 |
use super::user_agent::random_user_agent;
|
| 5 |
+
use crate::config::parser::Config;
|
| 6 |
use crate::handler::{file_path, FileType};
|
| 7 |
use crate::models::{
|
| 8 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
|
|
|
| 67 |
pub async fn aggregate(
|
| 68 |
query: &str,
|
| 69 |
page: u32,
|
| 70 |
+
config: &Config,
|
|
|
|
| 71 |
upstream_search_engines: &[EngineHandler],
|
|
|
|
| 72 |
safe_search: u8,
|
| 73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 74 |
let client = CLIENT.get_or_init(|| {
|
| 75 |
ClientBuilder::new()
|
| 76 |
+
.timeout(Duration::from_secs(config.request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
|
| 77 |
.https_only(true)
|
| 78 |
.gzip(true)
|
| 79 |
.brotli(true)
|
| 80 |
+
.http2_adaptive_window(config.adaptive_window)
|
| 81 |
.build()
|
| 82 |
.unwrap()
|
| 83 |
});
|
|
|
|
| 85 |
let user_agent: &str = random_user_agent();
|
| 86 |
|
| 87 |
// Add a random delay before making the request.
|
| 88 |
+
if config.aggregator.random_delay || !config.debug {
|
| 89 |
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.subsec_nanos() as f32;
|
| 90 |
let delay = ((nanos / 1_0000_0000 as f32).floor() as u64) + 1;
|
| 91 |
tokio::time::sleep(Duration::from_secs(delay)).await;
|
src/server/routes/search.rs
CHANGED
|
@@ -209,14 +209,12 @@ async fn results(
|
|
| 209 |
aggregate(
|
| 210 |
query,
|
| 211 |
page,
|
| 212 |
-
config
|
| 213 |
-
config.debug,
|
| 214 |
&search_settings
|
| 215 |
.engines
|
| 216 |
.iter()
|
| 217 |
.filter_map(|engine| EngineHandler::new(engine).ok())
|
| 218 |
.collect::<Vec<EngineHandler>>(),
|
| 219 |
-
config.request_timeout,
|
| 220 |
safe_search_level,
|
| 221 |
)
|
| 222 |
.await?
|
|
|
|
| 209 |
aggregate(
|
| 210 |
query,
|
| 211 |
page,
|
| 212 |
+
config,
|
|
|
|
| 213 |
&search_settings
|
| 214 |
.engines
|
| 215 |
.iter()
|
| 216 |
.filter_map(|engine| EngineHandler::new(engine).ok())
|
| 217 |
.collect::<Vec<EngineHandler>>(),
|
|
|
|
| 218 |
safe_search_level,
|
| 219 |
)
|
| 220 |
.await?
|
websurfx/config.lua
CHANGED
|
@@ -14,6 +14,8 @@ rate_limiter = {
|
|
| 14 |
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
| 15 |
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
| 16 |
}
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-- ### Search ###
|
| 19 |
-- Filter results based on different levels. The levels provided are:
|
|
|
|
| 14 |
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
| 15 |
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
| 16 |
}
|
| 17 |
+
-- Set whether the server will use an adaptive/dynamic HTTPS window size, see https://httpwg.org/specs/rfc9113.html#fc-principles
|
| 18 |
+
https_adaptive_window_size = false
|
| 19 |
|
| 20 |
-- ### Search ###
|
| 21 |
-- Filter results based on different levels. The levels provided are:
|