Spaces:
Running
Running
add some comments to filter_with_lists and add a basic test
Browse files- Cargo.lock +1 -0
- Cargo.toml +1 -0
- src/results/aggregator.rs +65 -1
Cargo.lock
CHANGED
|
@@ -3566,6 +3566,7 @@ dependencies = [
|
|
| 3566 |
"scraper",
|
| 3567 |
"serde",
|
| 3568 |
"serde_json",
|
|
|
|
| 3569 |
"tokio 1.32.0",
|
| 3570 |
]
|
| 3571 |
|
|
|
|
| 3566 |
"scraper",
|
| 3567 |
"serde",
|
| 3568 |
"serde_json",
|
| 3569 |
+
"tempfile",
|
| 3570 |
"tokio 1.32.0",
|
| 3571 |
]
|
| 3572 |
|
Cargo.toml
CHANGED
|
@@ -27,6 +27,7 @@ once_cell = {version="1.18.0"}
|
|
| 27 |
error-stack = {version="0.3.1"}
|
| 28 |
async-trait = {version="0.1.73"}
|
| 29 |
regex = {version="1.9.3", features=["perf"]}
|
|
|
|
| 30 |
|
| 31 |
[dev-dependencies]
|
| 32 |
rusty-hook = "^0.11.2"
|
|
|
|
| 27 |
error-stack = {version="0.3.1"}
|
| 28 |
async-trait = {version="0.1.73"}
|
| 29 |
regex = {version="1.9.3", features=["perf"]}
|
| 30 |
+
tempfile = "3.8.0"
|
| 31 |
|
| 32 |
[dev-dependencies]
|
| 33 |
rusty-hook = "^0.11.2"
|
src/results/aggregator.rs
CHANGED
|
@@ -175,22 +175,86 @@ pub async fn aggregate(
|
|
| 175 |
))
|
| 176 |
}
|
| 177 |
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
| 180 |
resultant_map: &mut HashMap<String, SearchResult>,
|
| 181 |
file_path: &str,
|
| 182 |
) -> Result<(), Box<dyn std::error::Error>> {
|
| 183 |
let mut reader = BufReader::new(File::open(file_path)?);
|
|
|
|
| 184 |
for line in reader.by_ref().lines() {
|
| 185 |
let re = Regex::new(&line?)?;
|
|
|
|
|
|
|
| 186 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
| 187 |
if re.is_match(&url.to_lowercase())
|
| 188 |
|| re.is_match(&search_result.title.to_lowercase())
|
| 189 |
|| re.is_match(&search_result.description.to_lowercase())
|
| 190 |
{
|
|
|
|
| 191 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
| 192 |
}
|
| 193 |
}
|
| 194 |
}
|
|
|
|
| 195 |
Ok(())
|
| 196 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
))
|
| 176 |
}
|
| 177 |
|
| 178 |
+
/// Filters a map of search results using a list of regex patterns.
|
| 179 |
+
///
|
| 180 |
+
/// # Arguments
|
| 181 |
+
///
|
| 182 |
+
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
| 183 |
+
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
| 184 |
+
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
| 185 |
+
///
|
| 186 |
+
/// # Errors
|
| 187 |
+
///
|
| 188 |
+
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
| 189 |
+
pub fn filter_with_lists(
|
| 190 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
| 191 |
resultant_map: &mut HashMap<String, SearchResult>,
|
| 192 |
file_path: &str,
|
| 193 |
) -> Result<(), Box<dyn std::error::Error>> {
|
| 194 |
let mut reader = BufReader::new(File::open(file_path)?);
|
| 195 |
+
|
| 196 |
for line in reader.by_ref().lines() {
|
| 197 |
let re = Regex::new(&line?)?;
|
| 198 |
+
|
| 199 |
+
// Iterate over each search result in the map and check if it matches the regex pattern
|
| 200 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
| 201 |
if re.is_match(&url.to_lowercase())
|
| 202 |
|| re.is_match(&search_result.title.to_lowercase())
|
| 203 |
|| re.is_match(&search_result.description.to_lowercase())
|
| 204 |
{
|
| 205 |
+
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
| 206 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
| 207 |
}
|
| 208 |
}
|
| 209 |
}
|
| 210 |
+
|
| 211 |
Ok(())
|
| 212 |
}
|
| 213 |
+
|
| 214 |
+
#[cfg(test)]
|
| 215 |
+
mod tests {
|
| 216 |
+
use super::*;
|
| 217 |
+
use std::collections::HashMap;
|
| 218 |
+
use std::io::Write;
|
| 219 |
+
use tempfile::NamedTempFile;
|
| 220 |
+
|
| 221 |
+
#[test]
|
| 222 |
+
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
| 223 |
+
// Create a map of search results to filter
|
| 224 |
+
let mut map_to_be_filtered = HashMap::new();
|
| 225 |
+
map_to_be_filtered.insert(
|
| 226 |
+
"https://www.example.com".to_string(),
|
| 227 |
+
SearchResult {
|
| 228 |
+
title: "Example Domain".to_string(),
|
| 229 |
+
url: "https://www.example.com".to_string(),
|
| 230 |
+
description: "This domain is for use in illustrative examples in documents.".to_string(),
|
| 231 |
+
engine: vec!["Google".to_string(), "Bing".to_string()],
|
| 232 |
+
},
|
| 233 |
+
);
|
| 234 |
+
map_to_be_filtered.insert(
|
| 235 |
+
"https://www.rust-lang.org/".to_string(),
|
| 236 |
+
SearchResult {
|
| 237 |
+
title: "Rust Programming Language".to_string(),
|
| 238 |
+
url: "https://www.rust-lang.org/".to_string(),
|
| 239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
| 240 |
+
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
| 241 |
+
},
|
| 242 |
+
);
|
| 243 |
+
|
| 244 |
+
// Create a temporary file with regex patterns
|
| 245 |
+
let mut file = NamedTempFile::new()?;
|
| 246 |
+
writeln!(file, "example")?;
|
| 247 |
+
writeln!(file, "rust")?;
|
| 248 |
+
file.flush()?;
|
| 249 |
+
|
| 250 |
+
let mut resultant_map = HashMap::new();
|
| 251 |
+
filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
|
| 252 |
+
|
| 253 |
+
assert_eq!(resultant_map.len(), 2);
|
| 254 |
+
assert!(resultant_map.contains_key("https://www.example.com"));
|
| 255 |
+
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
| 256 |
+
assert_eq!(map_to_be_filtered.len(), 0);
|
| 257 |
+
|
| 258 |
+
Ok(())
|
| 259 |
+
}
|
| 260 |
+
}
|