Spaces:

AVAIYA
/

google-news

Build error

App Files Files Community

AVAIYA commited on Nov 18, 2021

Commit

c5d9a7c

1 Parent(s): ae738e6

Create app.py

Browse files

Files changed (1) hide show

app.py +150 -0

app.py ADDED Viewed

	@@ -0,0 +1,150 @@

+#pip install GoogleNews
+#pip install --upgrade GoogleNews
+import streamlit as st
+from GoogleNews import GoogleNews
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import pandas as pd
+import numpy as np
+import string
+import re
+from nltk.corpus import stopwords
+from sklearn.metrics.pairwise import cosine_similarity
+import sklearn
+import time
+googlenews = GoogleNews()
+googlenews = GoogleNews(lang='ar')
+googlenews.clear()
+st.write("""
+Arabic Fake News Detection System
+A system designed as a part of master project
+done by Reem AlFouzan
+Supervised by : Dr, Abdulla al mutairi
+""")
+#df = pd.read_csv('News.csv')
+text_input = st.text_input (''' **Enter the text** ''')
+if len(text_input) != 0:
+    inputt = []
+    inputt = pd.DataFrame([text_input])
+    googlenews.search(inputt.iloc[0,0])
+    googlenews.get_news(inputt.iloc[0,0])
+    result_0 = googlenews.page_at(1)
+    total = len(result_0)
+    st.markdown(f"Credibility rate : { result_0 }")
+    st.markdown(f"Credibility rate : { total }")
+    # time.sleep(100)
+    # if len(result_0) == 0:
+    #   desc_1 = ['لا يوجد نتائج للخبر ']
+    #   link_1 = ['لا يوجد مصدر']
+    # if len(result_0) != 0:
+    #     desc_1 = googlenews.get_texts()
+    #     link_1 = googlenews.get_links()
+    #     for i in list(range(2, 70)):
+    #         result = googlenews.page_at(i)
+    #         desc = googlenews.get_texts()
+    #         link = googlenews.get_links()
+    #         desc_1 = desc_1 + desc
+    #         link_1 = link_1 + link
+    # column_names = ["text", 'link']
+    # df = pd.DataFrame(columns = column_names)
+    # df['text'] = desc_1
+    # df['link'] = link_1
+    # for letter in '#.][!XR':
+    #     df['text'] = df['text'].astype(str).str.replace(letter,'')
+    #     inputt[0] = inputt[0].astype(str).str.replace(letter,'')
+    # arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
+    # english_punctuations = string.punctuation
+    # punctuations_list = arabic_punctuations + english_punctuations
+    # def remove_punctuations(text):
+    #     translator = str.maketrans('', '', punctuations_list)
+    #     return text.translate(translator)
+    # def normalize_arabic(text):
+    #     text = re.sub("[إأآا]", "ا", text)
+    #     text = re.sub("ى", "ي", text)
+    #     text = re.sub("ة", "ه", text)
+    #     text = re.sub("گ", "ك", text)
+    #     return text
+    # def remove_repeating_char(text):
+    #     return re.sub(r'(.)\1+', r'\1', text)
+    # def processPost(text):
+    #     #Replace @username with empty string
+    #     text = re.sub('@[^\s]+', ' ', text)
+    #     #Convert www.* or https?://* to " "
+    #     text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
+    #     #Replace #word with word
+    #     text = re.sub(r'#([^\s]+)', r'\1', text)
+    #     # remove punctuations
+    #     text= remove_punctuations(text)
+    #     # normalize the text
+    #     text= normalize_arabic(text)
+    #     # remove repeated letters
+    #     text=remove_repeating_char(text)
+    #     return text
+    # df['text'] = df['text'].apply(lambda x: processPost(x))
+    # inputt[0] = inputt[0].apply(lambda x: processPost(x))
+    # st.markdown(f"my input is : { inputt.iloc[0,0] }")
+    # #input=input.apply(lambda x: processPost(x))
+    # vectorizer = TfidfVectorizer()
+    # vectors = vectorizer.fit_transform(df['text'])
+    # text_tfidf = pd.DataFrame(vectors.toarray())
+    # traninput = vectorizer.transform(inputt[0])
+    # traninput = traninput.toarray()
+    # cosine_sim = cosine_similarity(traninput,text_tfidf)
+    # top = np.max(cosine_sim)
+    # if top >= .85 :
+    #     prediction = 'الخبر صحيح'
+    # elif (top < .85) and (top >= .6) :
+    #     prediction = 'الخبر مظلل '
+    # elif top < .6 :
+    #     prediction = 'الخبر كاذب '
+    # st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
+    # st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
+    # st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
+    # st.markdown(f"system prediction: { prediction}")
+    # df.to_csv('Students.csv', sep ='\t')
+st.sidebar.markdown('مواقع اخباريه معتمده ')
+st.sidebar.markdown("[العربية](https://www.alarabiya.net/)")
+st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)")
+st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)")
+    #st.markdown('test')