neon_arch
commited on
Commit
·
13ce420
1
Parent(s):
2885f23
⚙️ refactor: add several minor optimizations (#180)(#178)
Browse files- src/results/aggregator.rs +45 -46
src/results/aggregator.rs
CHANGED
|
@@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|
| 64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
| 65 |
/// containing appropriate values.
|
| 66 |
pub async fn aggregate(
|
| 67 |
-
query:
|
| 68 |
page: u32,
|
| 69 |
random_delay: bool,
|
| 70 |
debug: bool,
|
| 71 |
-
upstream_search_engines:
|
| 72 |
request_timeout: u8,
|
| 73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 74 |
-
let user_agent:
|
| 75 |
|
| 76 |
// Add a random delay before making the request.
|
| 77 |
if random_delay || !debug {
|
|
@@ -80,19 +80,18 @@ pub async fn aggregate(
|
|
| 80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
| 81 |
}
|
| 82 |
|
| 83 |
-
let mut names: Vec<&str> =
|
| 84 |
|
| 85 |
// create tasks for upstream result fetching
|
| 86 |
let mut tasks: FutureVec = FutureVec::new();
|
| 87 |
|
| 88 |
for engine_handler in upstream_search_engines {
|
| 89 |
-
let (name, search_engine) = engine_handler.into_name_engine();
|
| 90 |
names.push(name);
|
| 91 |
-
let query: String = query.
|
| 92 |
-
let user_agent: String = user_agent.clone();
|
| 93 |
tasks.push(tokio::spawn(async move {
|
| 94 |
search_engine
|
| 95 |
-
.results(query, page, user_agent
|
| 96 |
.await
|
| 97 |
}));
|
| 98 |
}
|
|
@@ -110,7 +109,7 @@ pub async fn aggregate(
|
|
| 110 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
| 111 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
| 112 |
|
| 113 |
-
let mut handle_error = |error: Report<EngineError>, engine_name:
|
| 114 |
log::error!("Engine Error: {:?}", error);
|
| 115 |
engine_errors_info.push(EngineErrorInfo::new(
|
| 116 |
error.downcast_ref::<EngineError>().unwrap(),
|
|
@@ -120,7 +119,7 @@ pub async fn aggregate(
|
|
| 120 |
|
| 121 |
for _ in 0..responses.len() {
|
| 122 |
let response = responses.pop().unwrap();
|
| 123 |
-
let engine = names.pop().unwrap()
|
| 124 |
|
| 125 |
if result_map.is_empty() {
|
| 126 |
match response {
|
|
@@ -128,7 +127,7 @@ pub async fn aggregate(
|
|
| 128 |
result_map = results.clone();
|
| 129 |
}
|
| 130 |
Err(error) => {
|
| 131 |
-
handle_error(error, engine);
|
| 132 |
}
|
| 133 |
}
|
| 134 |
continue;
|
|
@@ -140,13 +139,13 @@ pub async fn aggregate(
|
|
| 140 |
result_map
|
| 141 |
.entry(key)
|
| 142 |
.and_modify(|result| {
|
| 143 |
-
result.add_engines(engine
|
| 144 |
})
|
| 145 |
.or_insert_with(|| -> SearchResult { value });
|
| 146 |
});
|
| 147 |
}
|
| 148 |
Err(error) => {
|
| 149 |
-
handle_error(error, engine);
|
| 150 |
}
|
| 151 |
}
|
| 152 |
}
|
|
@@ -155,24 +154,20 @@ pub async fn aggregate(
|
|
| 155 |
filter_with_lists(
|
| 156 |
&mut result_map,
|
| 157 |
&mut blacklist_map,
|
| 158 |
-
|
| 159 |
)?;
|
| 160 |
|
| 161 |
filter_with_lists(
|
| 162 |
&mut blacklist_map,
|
| 163 |
&mut result_map,
|
| 164 |
-
|
| 165 |
)?;
|
| 166 |
|
| 167 |
drop(blacklist_map);
|
| 168 |
|
| 169 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
| 170 |
|
| 171 |
-
Ok(SearchResults::new(
|
| 172 |
-
results,
|
| 173 |
-
query.to_string(),
|
| 174 |
-
engine_errors_info,
|
| 175 |
-
))
|
| 176 |
}
|
| 177 |
|
| 178 |
/// Filters a map of search results using a list of regex patterns.
|
|
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
|
|
| 203 |
|| re.is_match(&search_result.description.to_lowercase())
|
| 204 |
{
|
| 205 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
| 206 |
-
resultant_map.insert(
|
|
|
|
|
|
|
|
|
|
| 207 |
}
|
| 208 |
}
|
| 209 |
}
|
|
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
|
|
| 214 |
#[cfg(test)]
|
| 215 |
mod tests {
|
| 216 |
use super::*;
|
|
|
|
| 217 |
use std::collections::HashMap;
|
| 218 |
use std::io::Write;
|
| 219 |
use tempfile::NamedTempFile;
|
|
@@ -223,22 +222,22 @@ mod tests {
|
|
| 223 |
// Create a map of search results to filter
|
| 224 |
let mut map_to_be_filtered = HashMap::new();
|
| 225 |
map_to_be_filtered.insert(
|
| 226 |
-
"https://www.example.com".
|
| 227 |
SearchResult {
|
| 228 |
-
title: "Example Domain".
|
| 229 |
-
url: "https://www.example.com".
|
| 230 |
description: "This domain is for use in illustrative examples in documents."
|
| 231 |
-
.
|
| 232 |
-
engine:
|
| 233 |
},
|
| 234 |
);
|
| 235 |
map_to_be_filtered.insert(
|
| 236 |
-
"https://www.rust-lang.org/".
|
| 237 |
SearchResult {
|
| 238 |
-
title: "Rust Programming Language".
|
| 239 |
-
url: "https://www.rust-lang.org/".
|
| 240 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
| 241 |
-
engine:
|
| 242 |
},
|
| 243 |
);
|
| 244 |
|
|
@@ -267,22 +266,22 @@ mod tests {
|
|
| 267 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
| 268 |
let mut map_to_be_filtered = HashMap::new();
|
| 269 |
map_to_be_filtered.insert(
|
| 270 |
-
"https://www.example.com".
|
| 271 |
SearchResult {
|
| 272 |
-
title: "Example Domain".
|
| 273 |
-
url: "https://www.example.com".
|
| 274 |
description: "This domain is for use in illustrative examples in documents."
|
| 275 |
-
.
|
| 276 |
-
engine:
|
| 277 |
},
|
| 278 |
);
|
| 279 |
map_to_be_filtered.insert(
|
| 280 |
-
"https://www.rust-lang.org/".
|
| 281 |
SearchResult {
|
| 282 |
-
title: "Rust Programming Language".
|
| 283 |
-
url: "https://www.rust-lang.org/".
|
| 284 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
| 285 |
-
engine:
|
| 286 |
},
|
| 287 |
);
|
| 288 |
|
|
@@ -327,13 +326,13 @@ mod tests {
|
|
| 327 |
fn test_filter_with_lists_invalid_regex() {
|
| 328 |
let mut map_to_be_filtered = HashMap::new();
|
| 329 |
map_to_be_filtered.insert(
|
| 330 |
-
"https://www.example.com".
|
| 331 |
SearchResult {
|
| 332 |
-
title: "Example Domain".
|
| 333 |
-
url: "https://www.example.com".
|
| 334 |
description: "This domain is for use in illustrative examples in documents."
|
| 335 |
-
.
|
| 336 |
-
engine:
|
| 337 |
},
|
| 338 |
);
|
| 339 |
|
|
|
|
| 64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
| 65 |
/// containing appropriate values.
|
| 66 |
pub async fn aggregate(
|
| 67 |
+
query: &str,
|
| 68 |
page: u32,
|
| 69 |
random_delay: bool,
|
| 70 |
debug: bool,
|
| 71 |
+
upstream_search_engines: &[EngineHandler],
|
| 72 |
request_timeout: u8,
|
| 73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 74 |
+
let user_agent: &str = random_user_agent();
|
| 75 |
|
| 76 |
// Add a random delay before making the request.
|
| 77 |
if random_delay || !debug {
|
|
|
|
| 80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
| 81 |
}
|
| 82 |
|
| 83 |
+
let mut names: Vec<&str> = Vec::with_capacity(0);
|
| 84 |
|
| 85 |
// create tasks for upstream result fetching
|
| 86 |
let mut tasks: FutureVec = FutureVec::new();
|
| 87 |
|
| 88 |
for engine_handler in upstream_search_engines {
|
| 89 |
+
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
| 90 |
names.push(name);
|
| 91 |
+
let query: String = query.to_owned();
|
|
|
|
| 92 |
tasks.push(tokio::spawn(async move {
|
| 93 |
search_engine
|
| 94 |
+
.results(&query, page, user_agent, request_timeout)
|
| 95 |
.await
|
| 96 |
}));
|
| 97 |
}
|
|
|
|
| 109 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
| 110 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
| 111 |
|
| 112 |
+
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
| 113 |
log::error!("Engine Error: {:?}", error);
|
| 114 |
engine_errors_info.push(EngineErrorInfo::new(
|
| 115 |
error.downcast_ref::<EngineError>().unwrap(),
|
|
|
|
| 119 |
|
| 120 |
for _ in 0..responses.len() {
|
| 121 |
let response = responses.pop().unwrap();
|
| 122 |
+
let engine = names.pop().unwrap();
|
| 123 |
|
| 124 |
if result_map.is_empty() {
|
| 125 |
match response {
|
|
|
|
| 127 |
result_map = results.clone();
|
| 128 |
}
|
| 129 |
Err(error) => {
|
| 130 |
+
handle_error(&error, engine);
|
| 131 |
}
|
| 132 |
}
|
| 133 |
continue;
|
|
|
|
| 139 |
result_map
|
| 140 |
.entry(key)
|
| 141 |
.and_modify(|result| {
|
| 142 |
+
result.add_engines(engine);
|
| 143 |
})
|
| 144 |
.or_insert_with(|| -> SearchResult { value });
|
| 145 |
});
|
| 146 |
}
|
| 147 |
Err(error) => {
|
| 148 |
+
handle_error(&error, engine);
|
| 149 |
}
|
| 150 |
}
|
| 151 |
}
|
|
|
|
| 154 |
filter_with_lists(
|
| 155 |
&mut result_map,
|
| 156 |
&mut blacklist_map,
|
| 157 |
+
file_path(FileType::BlockList)?,
|
| 158 |
)?;
|
| 159 |
|
| 160 |
filter_with_lists(
|
| 161 |
&mut blacklist_map,
|
| 162 |
&mut result_map,
|
| 163 |
+
file_path(FileType::AllowList)?,
|
| 164 |
)?;
|
| 165 |
|
| 166 |
drop(blacklist_map);
|
| 167 |
|
| 168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
| 169 |
|
| 170 |
+
Ok(SearchResults::new(results, query, &engine_errors_info))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
}
|
| 172 |
|
| 173 |
/// Filters a map of search results using a list of regex patterns.
|
|
|
|
| 198 |
|| re.is_match(&search_result.description.to_lowercase())
|
| 199 |
{
|
| 200 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
| 201 |
+
resultant_map.insert(
|
| 202 |
+
url.to_owned(),
|
| 203 |
+
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
| 204 |
+
);
|
| 205 |
}
|
| 206 |
}
|
| 207 |
}
|
|
|
|
| 212 |
#[cfg(test)]
|
| 213 |
mod tests {
|
| 214 |
use super::*;
|
| 215 |
+
use smallvec::smallvec;
|
| 216 |
use std::collections::HashMap;
|
| 217 |
use std::io::Write;
|
| 218 |
use tempfile::NamedTempFile;
|
|
|
|
| 222 |
// Create a map of search results to filter
|
| 223 |
let mut map_to_be_filtered = HashMap::new();
|
| 224 |
map_to_be_filtered.insert(
|
| 225 |
+
"https://www.example.com".to_owned(),
|
| 226 |
SearchResult {
|
| 227 |
+
title: "Example Domain".to_owned(),
|
| 228 |
+
url: "https://www.example.com".to_owned(),
|
| 229 |
description: "This domain is for use in illustrative examples in documents."
|
| 230 |
+
.to_owned(),
|
| 231 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
| 232 |
},
|
| 233 |
);
|
| 234 |
map_to_be_filtered.insert(
|
| 235 |
+
"https://www.rust-lang.org/".to_owned(),
|
| 236 |
SearchResult {
|
| 237 |
+
title: "Rust Programming Language".to_owned(),
|
| 238 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
| 239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
| 240 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
| 241 |
},
|
| 242 |
);
|
| 243 |
|
|
|
|
| 266 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
| 267 |
let mut map_to_be_filtered = HashMap::new();
|
| 268 |
map_to_be_filtered.insert(
|
| 269 |
+
"https://www.example.com".to_owned(),
|
| 270 |
SearchResult {
|
| 271 |
+
title: "Example Domain".to_owned(),
|
| 272 |
+
url: "https://www.example.com".to_owned(),
|
| 273 |
description: "This domain is for use in illustrative examples in documents."
|
| 274 |
+
.to_owned(),
|
| 275 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
| 276 |
},
|
| 277 |
);
|
| 278 |
map_to_be_filtered.insert(
|
| 279 |
+
"https://www.rust-lang.org/".to_owned(),
|
| 280 |
SearchResult {
|
| 281 |
+
title: "Rust Programming Language".to_owned(),
|
| 282 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
| 283 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
| 284 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
| 285 |
},
|
| 286 |
);
|
| 287 |
|
|
|
|
| 326 |
fn test_filter_with_lists_invalid_regex() {
|
| 327 |
let mut map_to_be_filtered = HashMap::new();
|
| 328 |
map_to_be_filtered.insert(
|
| 329 |
+
"https://www.example.com".to_owned(),
|
| 330 |
SearchResult {
|
| 331 |
+
title: "Example Domain".to_owned(),
|
| 332 |
+
url: "https://www.example.com".to_owned(),
|
| 333 |
description: "This domain is for use in illustrative examples in documents."
|
| 334 |
+
.to_owned(),
|
| 335 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
| 336 |
},
|
| 337 |
);
|
| 338 |
|