AVAIYA commited on
Commit
c5d9a7c
·
1 Parent(s): ae738e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pip install GoogleNews
2
+ #pip install --upgrade GoogleNews
3
+
4
+ import streamlit as st
5
+ from GoogleNews import GoogleNews
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import pandas as pd
9
+ import numpy as np
10
+ import string
11
+ import re
12
+ from nltk.corpus import stopwords
13
+ from sklearn.metrics.pairwise import cosine_similarity
14
+ import sklearn
15
+ import time
16
+
17
+
18
+ googlenews = GoogleNews()
19
+ googlenews = GoogleNews(lang='ar')
20
+ googlenews.clear()
21
+
22
+
23
+
24
+ st.write("""
25
+ Arabic Fake News Detection System
26
+ A system designed as a part of master project
27
+ done by Reem AlFouzan
28
+ Supervised by : Dr, Abdulla al mutairi
29
+ """)
30
+ #df = pd.read_csv('News.csv')
31
+ text_input = st.text_input (''' **Enter the text** ''')
32
+ if len(text_input) != 0:
33
+ inputt = []
34
+ inputt = pd.DataFrame([text_input])
35
+
36
+ googlenews.search(inputt.iloc[0,0])
37
+ googlenews.get_news(inputt.iloc[0,0])
38
+
39
+ result_0 = googlenews.page_at(1)
40
+ total = len(result_0)
41
+ st.markdown(f"Credibility rate : { result_0 }")
42
+ st.markdown(f"Credibility rate : { total }")
43
+
44
+ # time.sleep(100)
45
+ # if len(result_0) == 0:
46
+ # desc_1 = ['لا يوجد نتائج للخبر ']
47
+ # link_1 = ['لا يوجد مصدر']
48
+ # if len(result_0) != 0:
49
+ # desc_1 = googlenews.get_texts()
50
+ # link_1 = googlenews.get_links()
51
+ # for i in list(range(2, 70)):
52
+
53
+ # result = googlenews.page_at(i)
54
+ # desc = googlenews.get_texts()
55
+ # link = googlenews.get_links()
56
+
57
+ # desc_1 = desc_1 + desc
58
+ # link_1 = link_1 + link
59
+
60
+ # column_names = ["text", 'link']
61
+ # df = pd.DataFrame(columns = column_names)
62
+
63
+ # df['text'] = desc_1
64
+ # df['link'] = link_1
65
+
66
+ # for letter in '#.][!XR':
67
+ # df['text'] = df['text'].astype(str).str.replace(letter,'')
68
+ # inputt[0] = inputt[0].astype(str).str.replace(letter,'')
69
+
70
+ # arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
71
+ # english_punctuations = string.punctuation
72
+ # punctuations_list = arabic_punctuations + english_punctuations
73
+
74
+ # def remove_punctuations(text):
75
+ # translator = str.maketrans('', '', punctuations_list)
76
+ # return text.translate(translator)
77
+
78
+ # def normalize_arabic(text):
79
+ # text = re.sub("[إأآا]", "ا", text)
80
+ # text = re.sub("ى", "ي", text)
81
+ # text = re.sub("ة", "ه", text)
82
+ # text = re.sub("گ", "ك", text)
83
+ # return text
84
+
85
+
86
+ # def remove_repeating_char(text):
87
+ # return re.sub(r'(.)\1+', r'\1', text)
88
+
89
+ # def processPost(text):
90
+
91
+ # #Replace @username with empty string
92
+ # text = re.sub('@[^\s]+', ' ', text)
93
+
94
+ # #Convert www.* or https?://* to " "
95
+ # text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ',text)
96
+
97
+ # #Replace #word with word
98
+ # text = re.sub(r'#([^\s]+)', r'\1', text)
99
+
100
+ # # remove punctuations
101
+ # text= remove_punctuations(text)
102
+
103
+ # # normalize the text
104
+ # text= normalize_arabic(text)
105
+
106
+ # # remove repeated letters
107
+ # text=remove_repeating_char(text)
108
+
109
+ # return text
110
+
111
+
112
+ # df['text'] = df['text'].apply(lambda x: processPost(x))
113
+ # inputt[0] = inputt[0].apply(lambda x: processPost(x))
114
+
115
+ # st.markdown(f"my input is : { inputt.iloc[0,0] }")
116
+ # #input=input.apply(lambda x: processPost(x))
117
+
118
+
119
+ # vectorizer = TfidfVectorizer()
120
+ # vectors = vectorizer.fit_transform(df['text'])
121
+
122
+ # text_tfidf = pd.DataFrame(vectors.toarray())
123
+
124
+ # traninput = vectorizer.transform(inputt[0])
125
+ # traninput = traninput.toarray()
126
+ # cosine_sim = cosine_similarity(traninput,text_tfidf)
127
+ # top = np.max(cosine_sim)
128
+
129
+
130
+ # if top >= .85 :
131
+ # prediction = 'الخبر صحيح'
132
+ # elif (top < .85) and (top >= .6) :
133
+ # prediction = 'الخبر مظلل '
134
+ # elif top < .6 :
135
+ # prediction = 'الخبر كاذب '
136
+
137
+
138
+ # st.markdown(f"most similar news is: { df['text'].iloc[np.argmax(np.array(cosine_sim[0]))] }")
139
+ # st.markdown(f"Source url : {df['link'].iloc[np.argmax(np.array(cosine_sim[0]))]}")
140
+ # st.markdown(f"Credibility rate : { np.max(cosine_sim)}")
141
+ # st.markdown(f"system prediction: { prediction}")
142
+ # df.to_csv('Students.csv', sep ='\t')
143
+
144
+
145
+ st.sidebar.markdown('مواقع اخباريه معتمده ')
146
+ st.sidebar.markdown("[العربية](https://www.alarabiya.net/)")
147
+ st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)")
148
+ st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)")
149
+
150
+ #st.markdown('test')