Spaces:
Sleeping
Sleeping
Commit
·
25b96ed
1
Parent(s):
41b2a44
Update preprocessing.py
Browse files- preprocessing.py +1 -20
preprocessing.py
CHANGED
@@ -46,26 +46,7 @@ def undo_contractions(phrase):
|
|
46 |
phrase = re.sub(r"[\'’]m", " am", phrase)
|
47 |
return phrase
|
48 |
|
49 |
-
emoji_regex =
|
50 |
-
u"\U0001F600-\U0001F64F" # emoticons
|
51 |
-
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
52 |
-
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
53 |
-
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
54 |
-
u"\U00002500-\U00002BEF" # chinese char
|
55 |
-
u"\U00002702-\U000027B0"
|
56 |
-
u"\U00002702-\U000027B0"
|
57 |
-
u"\U000024C2-\U0001F251"
|
58 |
-
u"\U0001f926-\U0001f937"
|
59 |
-
u"\U00010000-\U0010ffff"
|
60 |
-
u"\u2640-\u2642"
|
61 |
-
u"\u2600-\u2B55"
|
62 |
-
u"\u200d"
|
63 |
-
u"\u23cf"
|
64 |
-
u"\u23e9"
|
65 |
-
u"\u231a"
|
66 |
-
u"\ufe0f" # dingbats
|
67 |
-
u"\u3030"
|
68 |
-
"]+", re.UNICODE)
|
69 |
|
70 |
def preprocess_reviews(reviews):
|
71 |
reviews['text'] = reviews['title'] + ' . ' + reviews['review']
|
|
|
46 |
phrase = re.sub(r"[\'’]m", " am", phrase)
|
47 |
return phrase
|
48 |
|
49 |
+
emoji_regex = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def preprocess_reviews(reviews):
|
52 |
reviews['text'] = reviews['title'] + ' . ' + reviews['review']
|