Spaces:
Build error
Build error
#pip install GoogleNews | |
#pip install --upgrade GoogleNews | |
import streamlit as st | |
from GoogleNews import GoogleNews | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import pandas as pd | |
import numpy as np | |
import string | |
import re | |
from nltk.corpus import stopwords | |
from sklearn.metrics.pairwise import cosine_similarity | |
import sklearn | |
import time | |
googlenews = GoogleNews() | |
googlenews = GoogleNews(lang='ar') | |
googlenews.clear() | |
st.write(""" | |
Arabic Fake News Detection System | |
A system designed as a part of master project | |
done by Reem AlFouzan | |
Supervised by : Dr, Abdulla al mutairi | |
""") | |
#df = pd.read_csv('News.csv') | |
text_input = st.text_input (''' **Enter the text** ''') | |
if len(text_input) != 0: | |
inputt = [] | |
inputt = pd.DataFrame([text_input]) | |
googlenews.search(inputt.iloc[0,0]) | |
googlenews.get_news(inputt.iloc[0,0]) | |
result_0 = googlenews.page_at(1) | |
total = len(result_0) | |
st.markdown(f"Credibility rate : { result_0 }") | |
st.markdown(f"Credibility rate : { total }") | |
# time.sleep(100) | |
# if len(result_0) == 0: | |
# desc_1 = ['لا يوجد نتائج للخبر '] | |
# link_1 = ['لا يوجد مصدر'] | |
# if len(result_0) != 0: | |
# desc_1 = googlenews.get_texts() | |
# link_1 = googlenews.get_links() | |
# for i in list(range(2, 70)): | |
# result = googlenews.page_at(i) | |
# desc = googlenews.get_texts() | |
# link = googlenews.get_links() | |
# desc_1 = desc_1 + desc | |
# link_1 = link_1 + link | |
# column_names = ["text", 'link'] | |
# df = pd.DataFrame(columns = column_names) | |
# df['text'] = desc_1 | |
# df['link'] = link_1 | |
# for letter in '#.][!XR': | |
# df['text'] = df['text'].astype(str).str.replace(letter,'') | |
# inputt[0] = inputt[0].astype(str).str.replace(letter,'') | |
# arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ''' | |
# english_punctuations = string.punctuation | |
# punctuations_list = arabic_punctuations + english_punctuations | |
# def remove_punctuations(text): | |
# translator = str.maketrans('', '', punctuations_list) | |
# return text.translate(translator) | |
# def normalize_arabic(text): | |
# text = re.sub("[إأآا]", "ا", text) | |
# text = re.sub("ى", "ي", text) | |
# text = re.sub("ة", "ه", text) | |
# text = re.sub("گ", "ك", text) | |
# return text | |
# def remove_repeating_char(text): | |
# return re.sub(r'(.)\1+', r'\1', text) | |
# def processPost(text): | |
# #Replace @username with empty string | |
# text = re.sub('@[^\s]+', ' ', text) | |
# #Convert www.* or https?://* to " " | |
# text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text) | |
# #Replace #word with word | |
# text = re.sub(r'#([^\s]+)', r'\1', text) | |
# # remove punctuations | |
# text= remove_punctuations(text) | |
# # normalize the text | |
# text= normalize_arabic(text) | |
# # remove repeated letters | |
# text=remove_repeating_char(text) | |
# return text | |
# df['text'] = df['text'].apply(lambda x: processPost(x)) | |
# inputt[0] = inputt[0].apply(lambda x: processPost(x)) | |
# st.markdown(f"my input is : { inputt.iloc[0,0] }") | |
# #input=input.apply(lambda x: processPost(x)) | |
# vectorizer = TfidfVectorizer() | |
# vectors = vectorizer.fit_transform(df['text']) | |
# text_tfidf = pd.DataFrame(vectors.toarray()) | |
# traninput = vectorizer.transform(inputt[0]) | |
# traninput = traninput.toarray() | |
# cosine_sim = cosine_similarity(traninput,text_tfidf) | |
# top = np.max(cosine_sim) | |
# if top >= .85 : | |
# prediction = 'الخبر صحيح' | |
# elif (top < .85) and (top >= .6) : | |
# prediction = 'الخبر مظلل ' | |
# elif top < .6 : | |
# prediction = 'الخبر كاذب ' | |
# st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }") | |
# st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}") | |
# st.markdown(f"Credibility rate : { np.max(cosine_sim)}") | |
# st.markdown(f"system prediction: { prediction}") | |
# df.to_csv('Students.csv', sep ='\t') | |
st.sidebar.markdown('مواقع اخباريه معتمده ') | |
st.sidebar.markdown("[العربية](https://www.alarabiya.net/)") | |
st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)") | |
st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)") | |
#st.markdown('test') |