|
import streamlit as st |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from transformers import pipeline |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
class URLValidator: |
|
def __init__(self): |
|
self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') |
|
self.fake_news_classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-fake-news-detection") |
|
self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment") |
|
|
|
def fetch_page_content(self, url): |
|
try: |
|
response = requests.get(url, timeout=10) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, "html.parser") |
|
return " ".join([p.text for p in soup.find_all("p")]) |
|
except: |
|
return "" |
|
|
|
def compute_similarity_score(self, user_query, content): |
|
if not content: |
|
return 0 |
|
return int(util.pytorch_cos_sim( |
|
self.similarity_model.encode(user_query), |
|
self.similarity_model.encode(content) |
|
).item() * 100) |
|
|
|
def detect_bias(self, content): |
|
if not content: |
|
return 50 |
|
sentiment_result = self.sentiment_analyzer(content[:512])[0] |
|
return 100 if sentiment_result["label"] == "POSITIVE" else 50 if sentiment_result["label"] == "NEUTRAL" else 30 |
|
|
|
def get_star_rating(self, score: float): |
|
stars = max(1, min(5, round(score / 20))) |
|
return stars, "β" * stars |
|
|
|
def generate_explanation(self, domain_trust, similarity_score, fact_check_score, bias_score, citation_score, final_score): |
|
reasons = [] |
|
if domain_trust < 50: |
|
reasons.append("The source has low domain authority.") |
|
if similarity_score < 50: |
|
reasons.append("The content is not highly relevant to your query.") |
|
if fact_check_score < 50: |
|
reasons.append("Limited fact-checking verification found.") |
|
if bias_score < 50: |
|
reasons.append("Potential bias detected in the content.") |
|
if citation_score < 30: |
|
reasons.append("Few citations found for this content.") |
|
return " ".join(reasons) if reasons else "This source is highly credible and relevant." |
|
|
|
def rate_url_validity(self, user_query, url): |
|
content = self.fetch_page_content(url) |
|
similarity_score = self.compute_similarity_score(user_query, content) |
|
bias_score = self.detect_bias(content) |
|
|
|
domain_trust = 60 |
|
fact_check_score = 70 |
|
citation_score = 50 |
|
|
|
final_score = (0.3 * domain_trust) + (0.3 * similarity_score) + (0.2 * fact_check_score) + (0.1 * bias_score) + (0.1 * citation_score) |
|
|
|
stars, icon = self.get_star_rating(final_score) |
|
explanation = self.generate_explanation(domain_trust, similarity_score, fact_check_score, bias_score, citation_score, final_score) |
|
|
|
return { |
|
"Final Score": f"{final_score:.2f}%", |
|
"Star Rating": icon, |
|
"Explanation": explanation |
|
} |
|
|
|
|
|
validator = URLValidator() |
|
|
|
st.set_page_config(page_title="URL Credibility Checker", layout="centered") |
|
st.title("π URL Credibility Checker") |
|
st.markdown("**Analyze the credibility of a website based on its content, trustworthiness, and relevance.**") |
|
|
|
url = st.text_input("π Enter URL") |
|
query = st.text_input("π Enter Search Query") |
|
|
|
if st.button("β
Check Credibility"): |
|
if url and query: |
|
with st.spinner("Analyzing credibility..."): |
|
result = validator.rate_url_validity(query, url) |
|
st.success("Analysis Complete!") |
|
st.metric(label="Final Score", value=result["Final Score"]) |
|
st.write("**Star Rating:**", result["Star Rating"]) |
|
st.markdown(f"**π Explanation:** {result['Explanation']}") |
|
else: |
|
st.warning("β οΈ Please enter both a URL and a search query.") |
|
|