Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pip install GoogleNews
|
2 |
+
#pip install --upgrade GoogleNews
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from GoogleNews import GoogleNews
|
6 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
7 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
import string
|
11 |
+
import re
|
12 |
+
from nltk.corpus import stopwords
|
13 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
14 |
+
import sklearn
|
15 |
+
import time
|
16 |
+
|
17 |
+
|
18 |
+
googlenews = GoogleNews()
|
19 |
+
googlenews = GoogleNews(lang='ar')
|
20 |
+
googlenews.clear()
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
st.write("""
|
25 |
+
Arabic Fake News Detection System
|
26 |
+
A system designed as a part of master project
|
27 |
+
done by Reem AlFouzan
|
28 |
+
Supervised by : Dr, Abdulla al mutairi
|
29 |
+
""")
|
30 |
+
#df = pd.read_csv('News.csv')
|
31 |
+
text_input = st.text_input (''' **Enter the text** ''')
|
32 |
+
if len(text_input) != 0:
|
33 |
+
inputt = []
|
34 |
+
inputt = pd.DataFrame([text_input])
|
35 |
+
|
36 |
+
googlenews.search(inputt.iloc[0,0])
|
37 |
+
googlenews.get_news(inputt.iloc[0,0])
|
38 |
+
|
39 |
+
result_0 = googlenews.page_at(1)
|
40 |
+
total = len(result_0)
|
41 |
+
st.markdown(f"Credibility rate : { result_0 }")
|
42 |
+
st.markdown(f"Credibility rate : { total }")
|
43 |
+
|
44 |
+
# time.sleep(100)
|
45 |
+
# if len(result_0) == 0:
|
46 |
+
# desc_1 = ['لا يوجد نتائج للخبر ']
|
47 |
+
# link_1 = ['لا يوجد مصدر']
|
48 |
+
# if len(result_0) != 0:
|
49 |
+
# desc_1 = googlenews.get_texts()
|
50 |
+
# link_1 = googlenews.get_links()
|
51 |
+
# for i in list(range(2, 70)):
|
52 |
+
|
53 |
+
# result = googlenews.page_at(i)
|
54 |
+
# desc = googlenews.get_texts()
|
55 |
+
# link = googlenews.get_links()
|
56 |
+
|
57 |
+
# desc_1 = desc_1 + desc
|
58 |
+
# link_1 = link_1 + link
|
59 |
+
|
60 |
+
# column_names = ["text", 'link']
|
61 |
+
# df = pd.DataFrame(columns = column_names)
|
62 |
+
|
63 |
+
# df['text'] = desc_1
|
64 |
+
# df['link'] = link_1
|
65 |
+
|
66 |
+
# for letter in '#.][!XR':
|
67 |
+
# df['text'] = df['text'].astype(str).str.replace(letter,'')
|
68 |
+
# inputt[0] = inputt[0].astype(str).str.replace(letter,'')
|
69 |
+
|
70 |
+
# arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
|
71 |
+
# english_punctuations = string.punctuation
|
72 |
+
# punctuations_list = arabic_punctuations + english_punctuations
|
73 |
+
|
74 |
+
# def remove_punctuations(text):
|
75 |
+
# translator = str.maketrans('', '', punctuations_list)
|
76 |
+
# return text.translate(translator)
|
77 |
+
|
78 |
+
# def normalize_arabic(text):
|
79 |
+
# text = re.sub("[إأآا]", "ا", text)
|
80 |
+
# text = re.sub("ى", "ي", text)
|
81 |
+
# text = re.sub("ة", "ه", text)
|
82 |
+
# text = re.sub("گ", "ك", text)
|
83 |
+
# return text
|
84 |
+
|
85 |
+
|
86 |
+
# def remove_repeating_char(text):
|
87 |
+
# return re.sub(r'(.)\1+', r'\1', text)
|
88 |
+
|
89 |
+
# def processPost(text):
|
90 |
+
|
91 |
+
# #Replace @username with empty string
|
92 |
+
# text = re.sub('@[^\s]+', ' ', text)
|
93 |
+
|
94 |
+
# #Convert www.* or https?://* to " "
|
95 |
+
# text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
|
96 |
+
|
97 |
+
# #Replace #word with word
|
98 |
+
# text = re.sub(r'#([^\s]+)', r'\1', text)
|
99 |
+
|
100 |
+
# # remove punctuations
|
101 |
+
# text= remove_punctuations(text)
|
102 |
+
|
103 |
+
# # normalize the text
|
104 |
+
# text= normalize_arabic(text)
|
105 |
+
|
106 |
+
# # remove repeated letters
|
107 |
+
# text=remove_repeating_char(text)
|
108 |
+
|
109 |
+
# return text
|
110 |
+
|
111 |
+
|
112 |
+
# df['text'] = df['text'].apply(lambda x: processPost(x))
|
113 |
+
# inputt[0] = inputt[0].apply(lambda x: processPost(x))
|
114 |
+
|
115 |
+
# st.markdown(f"my input is : { inputt.iloc[0,0] }")
|
116 |
+
# #input=input.apply(lambda x: processPost(x))
|
117 |
+
|
118 |
+
|
119 |
+
# vectorizer = TfidfVectorizer()
|
120 |
+
# vectors = vectorizer.fit_transform(df['text'])
|
121 |
+
|
122 |
+
# text_tfidf = pd.DataFrame(vectors.toarray())
|
123 |
+
|
124 |
+
# traninput = vectorizer.transform(inputt[0])
|
125 |
+
# traninput = traninput.toarray()
|
126 |
+
# cosine_sim = cosine_similarity(traninput,text_tfidf)
|
127 |
+
# top = np.max(cosine_sim)
|
128 |
+
|
129 |
+
|
130 |
+
# if top >= .85 :
|
131 |
+
# prediction = 'الخبر صحيح'
|
132 |
+
# elif (top < .85) and (top >= .6) :
|
133 |
+
# prediction = 'الخبر مظلل '
|
134 |
+
# elif top < .6 :
|
135 |
+
# prediction = 'الخبر كاذب '
|
136 |
+
|
137 |
+
|
138 |
+
# st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
|
139 |
+
# st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
|
140 |
+
# st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
|
141 |
+
# st.markdown(f"system prediction: { prediction}")
|
142 |
+
# df.to_csv('Students.csv', sep ='\t')
|
143 |
+
|
144 |
+
|
145 |
+
st.sidebar.markdown('مواقع اخباريه معتمده ')
|
146 |
+
st.sidebar.markdown("[العربية](https://www.alarabiya.net/)")
|
147 |
+
st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)")
|
148 |
+
st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)")
|
149 |
+
|
150 |
+
#st.markdown('test')
|