#pip install GoogleNews #pip install --upgrade GoogleNews import streamlit as st from GoogleNews import GoogleNews from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import string import re from nltk.corpus import stopwords from sklearn.metrics.pairwise import cosine_similarity import sklearn import time googlenews = GoogleNews() googlenews = GoogleNews(lang='ar') googlenews.clear() st.write(""" Arabic Fake News Detection System A system designed as a part of master project done by Reem AlFouzan Supervised by : Dr, Abdulla al mutairi """) #df = pd.read_csv('News.csv') text_input = st.text_input (''' **Enter the text** ''') if len(text_input) != 0: inputt = [] inputt = pd.DataFrame([text_input]) googlenews.search(inputt.iloc[0,0]) googlenews.get_news(inputt.iloc[0,0]) result_0 = googlenews.page_at(1) total = len(result_0) st.markdown(f"Credibility rate : { result_0 }") st.markdown(f"Credibility rate : { total }") # time.sleep(100) # if len(result_0) == 0: # desc_1 = ['لا يوجد نتائج للخبر '] # link_1 = ['لا يوجد مصدر'] # if len(result_0) != 0: # desc_1 = googlenews.get_texts() # link_1 = googlenews.get_links() # for i in list(range(2, 70)): # result = googlenews.page_at(i) # desc = googlenews.get_texts() # link = googlenews.get_links() # desc_1 = desc_1 + desc # link_1 = link_1 + link # column_names = ["text", 'link'] # df = pd.DataFrame(columns = column_names) # df['text'] = desc_1 # df['link'] = link_1 # for letter in '#.][!XR': # df['text'] = df['text'].astype(str).str.replace(letter,'') # inputt[0] = inputt[0].astype(str).str.replace(letter,'') # arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ''' # english_punctuations = string.punctuation # punctuations_list = arabic_punctuations + english_punctuations # def remove_punctuations(text): # translator = str.maketrans('', '', punctuations_list) # return text.translate(translator) # def normalize_arabic(text): # text = re.sub("[إأآا]", "ا", text) # text = re.sub("ى", "ي", text) # text = re.sub("ة", "ه", text) # text = re.sub("گ", "ك", text) # return text # def remove_repeating_char(text): # return re.sub(r'(.)\1+', r'\1', text) # def processPost(text): # #Replace @username with empty string # text = re.sub('@[^\s]+', ' ', text) # #Convert www.* or https?://* to " " # text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text) # #Replace #word with word # text = re.sub(r'#([^\s]+)', r'\1', text) # # remove punctuations # text= remove_punctuations(text) # # normalize the text # text= normalize_arabic(text) # # remove repeated letters # text=remove_repeating_char(text) # return text # df['text'] = df['text'].apply(lambda x: processPost(x)) # inputt[0] = inputt[0].apply(lambda x: processPost(x)) # st.markdown(f"my input is : { inputt.iloc[0,0] }") # #input=input.apply(lambda x: processPost(x)) # vectorizer = TfidfVectorizer() # vectors = vectorizer.fit_transform(df['text']) # text_tfidf = pd.DataFrame(vectors.toarray()) # traninput = vectorizer.transform(inputt[0]) # traninput = traninput.toarray() # cosine_sim = cosine_similarity(traninput,text_tfidf) # top = np.max(cosine_sim) # if top >= .85 : # prediction = 'الخبر صحيح' # elif (top < .85) and (top >= .6) : # prediction = 'الخبر مظلل ' # elif top < .6 : # prediction = 'الخبر كاذب ' # st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }") # st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}") # st.markdown(f"Credibility rate : { np.max(cosine_sim)}") # st.markdown(f"system prediction: { prediction}") # df.to_csv('Students.csv', sep ='\t') st.sidebar.markdown('مواقع اخباريه معتمده ') st.sidebar.markdown("[العربية](https://www.alarabiya.net/)") st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)") st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)") #st.markdown('test')