Spaces:
Running
Running
Merge branch 'rolling' into feat-rate-limiter-for-websurfx
Browse files- Cargo.lock +6 -6
- Cargo.toml +2 -2
- README.md +1 -1
- public/images/barricade.png +0 -0
- public/images/filter.png +0 -0
- public/static/themes/simple.css +29 -0
- public/templates/search.html +62 -30
- src/engines/duckduckgo.rs +1 -0
- src/engines/engine_models.rs +1 -0
- src/engines/searx.rs +11 -2
- src/results/aggregation_models.rs +33 -4
- src/results/aggregator.rs +23 -14
- src/server/routes.rs +73 -15
- websurfx/config.lua +11 -0
Cargo.lock
CHANGED
|
@@ -544,18 +544,18 @@ dependencies = [
|
|
| 544 |
|
| 545 |
[[package]]
|
| 546 |
name = "clap"
|
| 547 |
-
version = "4.4.
|
| 548 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 549 |
-
checksum = "
|
| 550 |
dependencies = [
|
| 551 |
"clap_builder",
|
| 552 |
]
|
| 553 |
|
| 554 |
[[package]]
|
| 555 |
name = "clap_builder"
|
| 556 |
-
version = "4.4.
|
| 557 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 558 |
-
checksum = "
|
| 559 |
dependencies = [
|
| 560 |
"anstyle",
|
| 561 |
"clap_lex",
|
|
@@ -2764,9 +2764,9 @@ dependencies = [
|
|
| 2764 |
|
| 2765 |
[[package]]
|
| 2766 |
name = "rustix"
|
| 2767 |
-
version = "0.38.
|
| 2768 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2769 |
-
checksum = "
|
| 2770 |
dependencies = [
|
| 2771 |
"bitflags 2.4.0",
|
| 2772 |
"errno",
|
|
|
|
| 544 |
|
| 545 |
[[package]]
|
| 546 |
name = "clap"
|
| 547 |
+
version = "4.4.2"
|
| 548 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 549 |
+
checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
|
| 550 |
dependencies = [
|
| 551 |
"clap_builder",
|
| 552 |
]
|
| 553 |
|
| 554 |
[[package]]
|
| 555 |
name = "clap_builder"
|
| 556 |
+
version = "4.4.2"
|
| 557 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 558 |
+
checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
|
| 559 |
dependencies = [
|
| 560 |
"anstyle",
|
| 561 |
"clap_lex",
|
|
|
|
| 2764 |
|
| 2765 |
[[package]]
|
| 2766 |
name = "rustix"
|
| 2767 |
+
version = "0.38.11"
|
| 2768 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
| 2769 |
+
checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
|
| 2770 |
dependencies = [
|
| 2771 |
"bitflags 2.4.0",
|
| 2772 |
"errno",
|
Cargo.toml
CHANGED
|
@@ -20,8 +20,8 @@ serde_json = {version="1.0.105"}
|
|
| 20 |
fake-useragent = {version="0.1.3"}
|
| 21 |
env_logger = {version="0.10.0"}
|
| 22 |
log = {version="0.4.20"}
|
| 23 |
-
|
| 24 |
-
redis = {version="0.23.3"}
|
| 25 |
md5 = {version="0.7.0"}
|
| 26 |
rand={version="0.8.5"}
|
| 27 |
once_cell = {version="1.18.0"}
|
|
|
|
| 20 |
fake-useragent = {version="0.1.3"}
|
| 21 |
env_logger = {version="0.10.0"}
|
| 22 |
log = {version="0.4.20"}
|
| 23 |
+
mlua = {version="0.8.10", features=["luajit"]}
|
| 24 |
+
redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
|
| 25 |
md5 = {version="0.7.0"}
|
| 26 |
rand={version="0.8.5"}
|
| 27 |
once_cell = {version="1.18.0"}
|
README.md
CHANGED
|
@@ -51,7 +51,7 @@
|
|
| 51 |
- **Getting Started**
|
| 52 |
- [🔭 Preview](#preview-)
|
| 53 |
- [🚀 Features](#features-)
|
| 54 |
-
- [🛠️ Installation and Testing](#installation-and-testing
|
| 55 |
- [🔧 Configuration](#configuration-)
|
| 56 |
- **Feature Overview**
|
| 57 |
- [🎨 Theming](#theming-)
|
|
|
|
| 51 |
- **Getting Started**
|
| 52 |
- [🔭 Preview](#preview-)
|
| 53 |
- [🚀 Features](#features-)
|
| 54 |
+
- [🛠️ Installation and Testing](#installation-and-testing-%EF%B8%8F)
|
| 55 |
- [🔧 Configuration](#configuration-)
|
| 56 |
- **Feature Overview**
|
| 57 |
- [🎨 Theming](#theming-)
|
public/images/barricade.png
ADDED
|
public/images/filter.png
ADDED
|
public/static/themes/simple.css
CHANGED
|
@@ -132,6 +132,35 @@ body {
|
|
| 132 |
width: 1.2rem;
|
| 133 |
height: 1.2rem;
|
| 134 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
/* styles for the footer and header */
|
| 137 |
|
|
|
|
| 132 |
width: 1.2rem;
|
| 133 |
height: 1.2rem;
|
| 134 |
}
|
| 135 |
+
.results .result_disallowed,
|
| 136 |
+
.results .result_filtered {
|
| 137 |
+
display: flex;
|
| 138 |
+
justify-content: center;
|
| 139 |
+
align-items: center;
|
| 140 |
+
gap: 10rem;
|
| 141 |
+
font-size: 2rem;
|
| 142 |
+
color: var(--foreground-color);
|
| 143 |
+
margin: 0rem 7rem;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.results .result_disallowed .user_query,
|
| 147 |
+
.results .result_filtered .user_query {
|
| 148 |
+
color: var(--background-color);
|
| 149 |
+
font-weight: 300;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
.results .result_disallowed img,
|
| 153 |
+
.results .result_filtered img {
|
| 154 |
+
width: 30rem;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.results .result_disallowed div,
|
| 158 |
+
.results .result_filtered div {
|
| 159 |
+
display: flex;
|
| 160 |
+
flex-direction: column;
|
| 161 |
+
gap: 1rem;
|
| 162 |
+
line-break: strict;
|
| 163 |
+
}
|
| 164 |
|
| 165 |
/* styles for the footer and header */
|
| 166 |
|
public/templates/search.html
CHANGED
|
@@ -1,37 +1,69 @@
|
|
| 1 |
{{>header this.style}}
|
| 2 |
<main class="results">
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
</div>
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
<li>Make sure that all words are spelled correctly.</li>
|
| 22 |
-
<li>Try different keywords.</li>
|
| 23 |
-
<li>Try more general keywords.</li>
|
| 24 |
-
</ul>
|
| 25 |
-
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
| 26 |
</div>
|
| 27 |
-
{{/if}}
|
| 28 |
-
</div>
|
| 29 |
-
<div class="page_navigation">
|
| 30 |
-
<button type="button" onclick="navigate_backward()">
|
| 31 |
-
← previous
|
| 32 |
-
</button>
|
| 33 |
-
<button type="button" onclick="navigate_forward()">next →</button>
|
| 34 |
-
</div>
|
| 35 |
</main>
|
| 36 |
<script src="static/index.js"></script>
|
| 37 |
<script src="static/pagination.js"></script>
|
|
|
|
| 1 |
{{>header this.style}}
|
| 2 |
<main class="results">
|
| 3 |
+
{{>search_bar this}}
|
| 4 |
+
<div class="results_aggregated">
|
| 5 |
+
{{#if results}} {{#each results}}
|
| 6 |
+
<div class="result">
|
| 7 |
+
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
| 8 |
+
<small>{{{this.url}}}</small>
|
| 9 |
+
<p>{{{this.description}}}</p>
|
| 10 |
+
<div class="upstream_engines">
|
| 11 |
+
{{#each engine}}
|
| 12 |
+
<span>{{{this}}}</span>
|
| 13 |
+
{{/each}}
|
| 14 |
+
</div>
|
| 15 |
+
</div>
|
| 16 |
+
{{/each}} {{else}} {{#if disallowed}}
|
| 17 |
+
<div class="result_disallowed">
|
| 18 |
+
<div class="description">
|
| 19 |
+
<p>
|
| 20 |
+
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
| 21 |
+
has been disallowed.
|
| 22 |
+
</p>
|
| 23 |
+
<p class="description_paragraph">Dear user,</p>
|
| 24 |
+
<p class="description_paragraph">
|
| 25 |
+
The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
|
| 26 |
+
been blacklisted via server configuration and hence disallowed by the
|
| 27 |
+
server. Henceforth no results could be displayed for your query.
|
| 28 |
+
</p>
|
| 29 |
+
</div>
|
| 30 |
+
<img src="./images/barricade.png" alt="Image of a Barricade" />
|
| 31 |
+
</div>
|
| 32 |
+
{{else}} {{#if filtered}}
|
| 33 |
+
<div class="result_filtered">
|
| 34 |
+
<div class="description">
|
| 35 |
+
<p>
|
| 36 |
+
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
| 37 |
+
has been filtered.
|
| 38 |
+
</p>
|
| 39 |
+
<p class="description_paragraph">Dear user,</p>
|
| 40 |
+
<p class="description_paragraph">
|
| 41 |
+
All the search results contain results that has been configured to be
|
| 42 |
+
filtered out via server configuration and henceforth has been
|
| 43 |
+
completely filtered out.
|
| 44 |
+
</p>
|
| 45 |
+
</div>
|
| 46 |
+
<img src="./images/filter.png" alt="Image of a paper inside a funnel" />
|
| 47 |
+
</div>
|
| 48 |
+
{{else}}
|
| 49 |
+
<div class="result_not_found">
|
| 50 |
+
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
| 51 |
+
<p class="suggestions">Suggestions:</p>
|
| 52 |
+
<ul>
|
| 53 |
+
<li>Make sure that all words are spelled correctly.</li>
|
| 54 |
+
<li>Try different keywords.</li>
|
| 55 |
+
<li>Try more general keywords.</li>
|
| 56 |
+
</ul>
|
| 57 |
+
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
| 58 |
+
</div>
|
| 59 |
+
{{/if}} {{/if}} {{/if}}
|
| 60 |
</div>
|
| 61 |
+
<div class="page_navigation">
|
| 62 |
+
<button type="button" onclick="navigate_backward()">
|
| 63 |
+
← previous
|
| 64 |
+
</button>
|
| 65 |
+
<button type="button" onclick="navigate_forward()">next →</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
</main>
|
| 68 |
<script src="static/index.js"></script>
|
| 69 |
<script src="static/pagination.js"></script>
|
src/engines/duckduckgo.rs
CHANGED
|
@@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
|
|
| 43 |
page: u32,
|
| 44 |
user_agent: &str,
|
| 45 |
request_timeout: u8,
|
|
|
|
| 46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
| 47 |
// Page number can be missing or empty string and so appropriate handling is required
|
| 48 |
// so that upstream server recieves valid page number.
|
|
|
|
| 43 |
page: u32,
|
| 44 |
user_agent: &str,
|
| 45 |
request_timeout: u8,
|
| 46 |
+
_safe_search: u8,
|
| 47 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
| 48 |
// Page number can be missing or empty string and so appropriate handling is required
|
| 49 |
// so that upstream server recieves valid page number.
|
src/engines/engine_models.rs
CHANGED
|
@@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
|
|
| 71 |
page: u32,
|
| 72 |
user_agent: &str,
|
| 73 |
request_timeout: u8,
|
|
|
|
| 74 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
| 75 |
}
|
| 76 |
|
|
|
|
| 71 |
page: u32,
|
| 72 |
user_agent: &str,
|
| 73 |
request_timeout: u8,
|
| 74 |
+
safe_search: u8,
|
| 75 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
| 76 |
}
|
| 77 |
|
src/engines/searx.rs
CHANGED
|
@@ -42,12 +42,21 @@ impl SearchEngine for Searx {
|
|
| 42 |
page: u32,
|
| 43 |
user_agent: &str,
|
| 44 |
request_timeout: u8,
|
|
|
|
| 45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
| 46 |
// Page number can be missing or empty string and so appropriate handling is required
|
| 47 |
// so that upstream server recieves valid page number.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
let url: String = match page {
|
| 49 |
-
0 | 1 =>
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
};
|
| 52 |
|
| 53 |
// initializing headers and adding appropriate headers.
|
|
|
|
| 42 |
page: u32,
|
| 43 |
user_agent: &str,
|
| 44 |
request_timeout: u8,
|
| 45 |
+
mut safe_search: u8,
|
| 46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
| 47 |
// Page number can be missing or empty string and so appropriate handling is required
|
| 48 |
// so that upstream server recieves valid page number.
|
| 49 |
+
if safe_search == 3 {
|
| 50 |
+
safe_search = 2;
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
let url: String = match page {
|
| 54 |
+
0 | 1 => {
|
| 55 |
+
format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
|
| 56 |
+
}
|
| 57 |
+
_ => format!(
|
| 58 |
+
"https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
|
| 59 |
+
),
|
| 60 |
};
|
| 61 |
|
| 62 |
// initializing headers and adding appropriate headers.
|
src/results/aggregation_models.rs
CHANGED
|
@@ -102,13 +102,15 @@ impl EngineErrorInfo {
|
|
| 102 |
/// and the type of error that caused it.
|
| 103 |
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
| 104 |
/// given search query.
|
| 105 |
-
#[derive(Serialize, Deserialize)]
|
| 106 |
#[serde(rename_all = "camelCase")]
|
| 107 |
pub struct SearchResults {
|
| 108 |
pub results: Vec<SearchResult>,
|
| 109 |
pub page_query: String,
|
| 110 |
pub style: Style,
|
| 111 |
-
pub engine_errors_info:
|
|
|
|
|
|
|
| 112 |
}
|
| 113 |
|
| 114 |
impl SearchResults {
|
|
@@ -122,6 +124,7 @@ impl SearchResults {
|
|
| 122 |
/// the search url.
|
| 123 |
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
| 124 |
/// given search query.
|
|
|
|
| 125 |
pub fn new(
|
| 126 |
results: Vec<SearchResult>,
|
| 127 |
page_query: &str,
|
|
@@ -131,12 +134,38 @@ impl SearchResults {
|
|
| 131 |
results,
|
| 132 |
page_query: page_query.to_owned(),
|
| 133 |
style: Style::default(),
|
| 134 |
-
engine_errors_info:
|
|
|
|
|
|
|
| 135 |
}
|
| 136 |
}
|
| 137 |
|
| 138 |
/// A setter function to add website style to the return search results.
|
| 139 |
pub fn add_style(&mut self, style: &Style) {
|
| 140 |
-
self.style = style.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
}
|
| 142 |
}
|
|
|
|
| 102 |
/// and the type of error that caused it.
|
| 103 |
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
| 104 |
/// given search query.
|
| 105 |
+
#[derive(Serialize, Deserialize, Default)]
|
| 106 |
#[serde(rename_all = "camelCase")]
|
| 107 |
pub struct SearchResults {
|
| 108 |
pub results: Vec<SearchResult>,
|
| 109 |
pub page_query: String,
|
| 110 |
pub style: Style,
|
| 111 |
+
pub engine_errors_info: Vec<EngineErrorInfo>,
|
| 112 |
+
pub disallowed: bool,
|
| 113 |
+
pub filtered: bool,
|
| 114 |
}
|
| 115 |
|
| 116 |
impl SearchResults {
|
|
|
|
| 124 |
/// the search url.
|
| 125 |
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
| 126 |
/// given search query.
|
| 127 |
+
/// * ``
|
| 128 |
pub fn new(
|
| 129 |
results: Vec<SearchResult>,
|
| 130 |
page_query: &str,
|
|
|
|
| 134 |
results,
|
| 135 |
page_query: page_query.to_owned(),
|
| 136 |
style: Style::default(),
|
| 137 |
+
engine_errors_info: engine_errors_info.to_owned(),
|
| 138 |
+
disallowed: Default::default(),
|
| 139 |
+
filtered: Default::default(),
|
| 140 |
}
|
| 141 |
}
|
| 142 |
|
| 143 |
/// A setter function to add website style to the return search results.
|
| 144 |
pub fn add_style(&mut self, style: &Style) {
|
| 145 |
+
self.style = style.clone();
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
/// A setter function that sets disallowed to true.
|
| 149 |
+
pub fn set_disallowed(&mut self) {
|
| 150 |
+
self.disallowed = true;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
/// A setter function to set the current page search query.
|
| 154 |
+
pub fn set_page_query(&mut self, page: &str) {
|
| 155 |
+
self.page_query = page.to_owned();
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
/// A setter function that sets the filtered to true.
|
| 159 |
+
pub fn set_filtered(&mut self) {
|
| 160 |
+
self.filtered = true;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
/// A getter function that gets the value of `engine_errors_info`.
|
| 164 |
+
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
| 165 |
+
std::mem::take(&mut self.engine_errors_info)
|
| 166 |
+
}
|
| 167 |
+
/// A getter function that gets the value of `results`.
|
| 168 |
+
pub fn results(&mut self) -> Vec<SearchResult> {
|
| 169 |
+
self.results.clone()
|
| 170 |
}
|
| 171 |
}
|
src/results/aggregator.rs
CHANGED
|
@@ -70,6 +70,7 @@ pub async fn aggregate(
|
|
| 70 |
debug: bool,
|
| 71 |
upstream_search_engines: &[EngineHandler],
|
| 72 |
request_timeout: u8,
|
|
|
|
| 73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 74 |
let user_agent: &str = random_user_agent();
|
| 75 |
|
|
@@ -91,7 +92,13 @@ pub async fn aggregate(
|
|
| 91 |
let query: String = query.to_owned();
|
| 92 |
tasks.push(tokio::spawn(async move {
|
| 93 |
search_engine
|
| 94 |
-
.results(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
.await
|
| 96 |
}));
|
| 97 |
}
|
|
@@ -150,20 +157,22 @@ pub async fn aggregate(
|
|
| 150 |
}
|
| 151 |
}
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
| 169 |
|
|
@@ -189,7 +198,7 @@ pub fn filter_with_lists(
|
|
| 189 |
let mut reader = BufReader::new(File::open(file_path)?);
|
| 190 |
|
| 191 |
for line in reader.by_ref().lines() {
|
| 192 |
-
let re = Regex::new(
|
| 193 |
|
| 194 |
// Iterate over each search result in the map and check if it matches the regex pattern
|
| 195 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
|
|
|
| 70 |
debug: bool,
|
| 71 |
upstream_search_engines: &[EngineHandler],
|
| 72 |
request_timeout: u8,
|
| 73 |
+
safe_search: u8,
|
| 74 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 75 |
let user_agent: &str = random_user_agent();
|
| 76 |
|
|
|
|
| 92 |
let query: String = query.to_owned();
|
| 93 |
tasks.push(tokio::spawn(async move {
|
| 94 |
search_engine
|
| 95 |
+
.results(
|
| 96 |
+
&query,
|
| 97 |
+
page,
|
| 98 |
+
user_agent.clone(),
|
| 99 |
+
request_timeout,
|
| 100 |
+
safe_search,
|
| 101 |
+
)
|
| 102 |
.await
|
| 103 |
}));
|
| 104 |
}
|
|
|
|
| 157 |
}
|
| 158 |
}
|
| 159 |
|
| 160 |
+
if safe_search >= 3 {
|
| 161 |
+
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
| 162 |
+
filter_with_lists(
|
| 163 |
+
&mut result_map,
|
| 164 |
+
&mut blacklist_map,
|
| 165 |
+
file_path(FileType::BlockList)?,
|
| 166 |
+
)?;
|
| 167 |
|
| 168 |
+
filter_with_lists(
|
| 169 |
+
&mut blacklist_map,
|
| 170 |
+
&mut result_map,
|
| 171 |
+
file_path(FileType::AllowList)?,
|
| 172 |
+
)?;
|
| 173 |
|
| 174 |
+
drop(blacklist_map);
|
| 175 |
+
}
|
| 176 |
|
| 177 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
| 178 |
|
|
|
|
| 198 |
let mut reader = BufReader::new(File::open(file_path)?);
|
| 199 |
|
| 200 |
for line in reader.by_ref().lines() {
|
| 201 |
+
let re = Regex::new(line?.trim())?;
|
| 202 |
|
| 203 |
// Iterate over each search result in the map and check if it matches the regex pattern
|
| 204 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
src/server/routes.rs
CHANGED
|
@@ -2,7 +2,10 @@
|
|
| 2 |
//! meta search engine website and provide appropriate response to each route/page
|
| 3 |
//! when requested.
|
| 4 |
|
| 5 |
-
use std::
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
use crate::{
|
| 8 |
cache::cacher::RedisCache,
|
|
@@ -13,12 +16,13 @@ use crate::{
|
|
| 13 |
};
|
| 14 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
| 15 |
use handlebars::Handlebars;
|
|
|
|
| 16 |
use serde::Deserialize;
|
| 17 |
use tokio::join;
|
| 18 |
|
| 19 |
// ---- Constants ----
|
| 20 |
/// Initialize redis cache connection once and store it on the heap.
|
| 21 |
-
|
| 22 |
|
| 23 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
| 24 |
///
|
|
@@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
|
|
| 32 |
struct SearchParams {
|
| 33 |
q: Option<String>,
|
| 34 |
page: Option<u32>,
|
|
|
|
| 35 |
}
|
| 36 |
|
| 37 |
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
|
@@ -105,42 +110,58 @@ pub async fn search(
|
|
| 105 |
None => 1,
|
| 106 |
};
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
let (_, results, _) = join!(
|
| 109 |
results(
|
| 110 |
format!(
|
| 111 |
-
"http://{}:{}/search?q={}&page={}",
|
| 112 |
config.binding_ip,
|
| 113 |
config.port,
|
| 114 |
query,
|
| 115 |
-
page - 1
|
|
|
|
| 116 |
),
|
| 117 |
&config,
|
| 118 |
query,
|
| 119 |
page - 1,
|
| 120 |
-
|
|
|
|
| 121 |
),
|
| 122 |
results(
|
| 123 |
format!(
|
| 124 |
-
"http://{}:{}/search?q={}&page={}",
|
| 125 |
-
config.binding_ip, config.port, query, page
|
| 126 |
),
|
| 127 |
&config,
|
| 128 |
query,
|
| 129 |
page,
|
| 130 |
-
|
|
|
|
| 131 |
),
|
| 132 |
results(
|
| 133 |
format!(
|
| 134 |
-
"http://{}:{}/search?q={}&page={}",
|
| 135 |
config.binding_ip,
|
| 136 |
config.port,
|
| 137 |
query,
|
| 138 |
-
page + 1
|
|
|
|
| 139 |
),
|
| 140 |
&config,
|
| 141 |
query,
|
| 142 |
page + 1,
|
| 143 |
-
|
|
|
|
| 144 |
)
|
| 145 |
);
|
| 146 |
|
|
@@ -160,9 +181,10 @@ async fn results(
|
|
| 160 |
config: &Config,
|
| 161 |
query: &str,
|
| 162 |
page: u32,
|
| 163 |
-
req:
|
|
|
|
| 164 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 165 |
-
let redis_cache: RedisCache = REDIS_CACHE
|
| 166 |
.get_or_init(async {
|
| 167 |
// Initialize redis cache connection pool only one and store it in the heap.
|
| 168 |
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
|
@@ -178,6 +200,23 @@ async fn results(
|
|
| 178 |
match cached_results_json {
|
| 179 |
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
| 180 |
Err(_) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
// check if the cookie value is empty or not if it is empty then use the
|
| 182 |
// default selected upstream search engines from the config file otherwise
|
| 183 |
// parse the non-empty cookie and grab the user selected engines from the
|
|
@@ -199,6 +238,7 @@ async fn results(
|
|
| 199 |
config.debug,
|
| 200 |
&engines,
|
| 201 |
config.request_timeout,
|
|
|
|
| 202 |
)
|
| 203 |
.await?
|
| 204 |
}
|
|
@@ -210,14 +250,16 @@ async fn results(
|
|
| 210 |
config.debug,
|
| 211 |
&config.upstream_search_engines,
|
| 212 |
config.request_timeout,
|
|
|
|
| 213 |
)
|
| 214 |
.await?
|
| 215 |
}
|
| 216 |
};
|
| 217 |
-
|
|
|
|
|
|
|
| 218 |
results.add_style(&config.style);
|
| 219 |
redis_cache
|
| 220 |
-
.clone()
|
| 221 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
| 222 |
.await?;
|
| 223 |
Ok(results)
|
|
@@ -225,6 +267,22 @@ async fn results(
|
|
| 225 |
}
|
| 226 |
}
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
| 229 |
#[get("/robots.txt")]
|
| 230 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
|
|
|
| 2 |
//! meta search engine website and provide appropriate response to each route/page
|
| 3 |
//! when requested.
|
| 4 |
|
| 5 |
+
use std::{
|
| 6 |
+
fs::{read_to_string, File},
|
| 7 |
+
io::{BufRead, BufReader, Read},
|
| 8 |
+
};
|
| 9 |
|
| 10 |
use crate::{
|
| 11 |
cache::cacher::RedisCache,
|
|
|
|
| 16 |
};
|
| 17 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
| 18 |
use handlebars::Handlebars;
|
| 19 |
+
use regex::Regex;
|
| 20 |
use serde::Deserialize;
|
| 21 |
use tokio::join;
|
| 22 |
|
| 23 |
// ---- Constants ----
|
| 24 |
/// Initialize redis cache connection once and store it on the heap.
|
| 25 |
+
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
| 26 |
|
| 27 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
| 28 |
///
|
|
|
|
| 36 |
struct SearchParams {
|
| 37 |
q: Option<String>,
|
| 38 |
page: Option<u32>,
|
| 39 |
+
safesearch: Option<u8>,
|
| 40 |
}
|
| 41 |
|
| 42 |
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
|
|
|
| 110 |
None => 1,
|
| 111 |
};
|
| 112 |
|
| 113 |
+
let safe_search: u8 = match config.safe_search {
|
| 114 |
+
3..=4 => config.safe_search,
|
| 115 |
+
_ => match ¶ms.safesearch {
|
| 116 |
+
Some(safesearch) => match safesearch {
|
| 117 |
+
0..=2 => *safesearch,
|
| 118 |
+
_ => 1,
|
| 119 |
+
},
|
| 120 |
+
None => config.safe_search,
|
| 121 |
+
},
|
| 122 |
+
};
|
| 123 |
+
|
| 124 |
let (_, results, _) = join!(
|
| 125 |
results(
|
| 126 |
format!(
|
| 127 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
| 128 |
config.binding_ip,
|
| 129 |
config.port,
|
| 130 |
query,
|
| 131 |
+
page - 1,
|
| 132 |
+
safe_search
|
| 133 |
),
|
| 134 |
&config,
|
| 135 |
query,
|
| 136 |
page - 1,
|
| 137 |
+
req.clone(),
|
| 138 |
+
safe_search
|
| 139 |
),
|
| 140 |
results(
|
| 141 |
format!(
|
| 142 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
| 143 |
+
config.binding_ip, config.port, query, page, safe_search
|
| 144 |
),
|
| 145 |
&config,
|
| 146 |
query,
|
| 147 |
page,
|
| 148 |
+
req.clone(),
|
| 149 |
+
safe_search
|
| 150 |
),
|
| 151 |
results(
|
| 152 |
format!(
|
| 153 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
| 154 |
config.binding_ip,
|
| 155 |
config.port,
|
| 156 |
query,
|
| 157 |
+
page + 1,
|
| 158 |
+
safe_search
|
| 159 |
),
|
| 160 |
&config,
|
| 161 |
query,
|
| 162 |
page + 1,
|
| 163 |
+
req.clone(),
|
| 164 |
+
safe_search
|
| 165 |
)
|
| 166 |
);
|
| 167 |
|
|
|
|
| 181 |
config: &Config,
|
| 182 |
query: &str,
|
| 183 |
page: u32,
|
| 184 |
+
req: HttpRequest,
|
| 185 |
+
safe_search: u8,
|
| 186 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
| 187 |
+
let mut redis_cache: RedisCache = REDIS_CACHE
|
| 188 |
.get_or_init(async {
|
| 189 |
// Initialize redis cache connection pool only one and store it in the heap.
|
| 190 |
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
|
|
|
| 200 |
match cached_results_json {
|
| 201 |
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
| 202 |
Err(_) => {
|
| 203 |
+
if safe_search == 4 {
|
| 204 |
+
let mut results: SearchResults = SearchResults::default();
|
| 205 |
+
let mut _flag: bool =
|
| 206 |
+
is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
| 207 |
+
_flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
|
| 208 |
+
|
| 209 |
+
if _flag {
|
| 210 |
+
results.set_disallowed();
|
| 211 |
+
results.add_style(&config.style);
|
| 212 |
+
results.set_page_query(query);
|
| 213 |
+
redis_cache
|
| 214 |
+
.cache_results(&serde_json::to_string(&results)?, &url)
|
| 215 |
+
.await?;
|
| 216 |
+
return Ok(results);
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
// check if the cookie value is empty or not if it is empty then use the
|
| 221 |
// default selected upstream search engines from the config file otherwise
|
| 222 |
// parse the non-empty cookie and grab the user selected engines from the
|
|
|
|
| 238 |
config.debug,
|
| 239 |
&engines,
|
| 240 |
config.request_timeout,
|
| 241 |
+
safe_search,
|
| 242 |
)
|
| 243 |
.await?
|
| 244 |
}
|
|
|
|
| 250 |
config.debug,
|
| 251 |
&config.upstream_search_engines,
|
| 252 |
config.request_timeout,
|
| 253 |
+
safe_search,
|
| 254 |
)
|
| 255 |
.await?
|
| 256 |
}
|
| 257 |
};
|
| 258 |
+
if results.engine_errors_info().is_empty() && results.results().is_empty() {
|
| 259 |
+
results.set_filtered();
|
| 260 |
+
}
|
| 261 |
results.add_style(&config.style);
|
| 262 |
redis_cache
|
|
|
|
| 263 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
| 264 |
.await?;
|
| 265 |
Ok(results)
|
|
|
|
| 267 |
}
|
| 268 |
}
|
| 269 |
|
| 270 |
+
fn is_match_from_filter_list(
|
| 271 |
+
file_path: &str,
|
| 272 |
+
query: &str,
|
| 273 |
+
) -> Result<bool, Box<dyn std::error::Error>> {
|
| 274 |
+
let mut flag = false;
|
| 275 |
+
let mut reader = BufReader::new(File::open(file_path)?);
|
| 276 |
+
for line in reader.by_ref().lines() {
|
| 277 |
+
let re = Regex::new(&line?)?;
|
| 278 |
+
if re.is_match(query) {
|
| 279 |
+
flag = true;
|
| 280 |
+
break;
|
| 281 |
+
}
|
| 282 |
+
}
|
| 283 |
+
Ok(flag)
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
| 287 |
#[get("/robots.txt")]
|
| 288 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
websurfx/config.lua
CHANGED
|
@@ -15,6 +15,17 @@ rate_limiter = {
|
|
| 15 |
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
| 16 |
}
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
-- ### Website ###
|
| 19 |
-- The different colorschemes provided are:
|
| 20 |
-- {{
|
|
|
|
| 15 |
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
| 16 |
}
|
| 17 |
|
| 18 |
+
-- ### Search ###
|
| 19 |
+
-- Filter results based on different levels. The levels provided are:
|
| 20 |
+
-- {{
|
| 21 |
+
-- 0 - None
|
| 22 |
+
-- 1 - Low
|
| 23 |
+
-- 2 - Moderate
|
| 24 |
+
-- 3 - High
|
| 25 |
+
-- 4 - Aggressive
|
| 26 |
+
-- }}
|
| 27 |
+
safe_search = 2
|
| 28 |
+
|
| 29 |
-- ### Website ###
|
| 30 |
-- The different colorschemes provided are:
|
| 31 |
-- {{
|