analytics-jiten commited on
Commit
25b96ed
·
1 Parent(s): 41b2a44

Update preprocessing.py

Browse files
Files changed (1) hide show
  1. preprocessing.py +1 -20
preprocessing.py CHANGED
@@ -46,26 +46,7 @@ def undo_contractions(phrase):
46
  phrase = re.sub(r"[\'’]m", " am", phrase)
47
  return phrase
48
 
49
- emoji_regex = re.compile("["
50
- u"\U0001F600-\U0001F64F" # emoticons
51
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
52
- u"\U0001F680-\U0001F6FF" # transport & map symbols
53
- u"\U0001F1E0-\U0001F1FF" # flags (iOS)
54
- u"\U00002500-\U00002BEF" # chinese char
55
- u"\U00002702-\U000027B0"
56
- u"\U00002702-\U000027B0"
57
- u"\U000024C2-\U0001F251"
58
- u"\U0001f926-\U0001f937"
59
- u"\U00010000-\U0010ffff"
60
- u"\u2640-\u2642"
61
- u"\u2600-\u2B55"
62
- u"\u200d"
63
- u"\u23cf"
64
- u"\u23e9"
65
- u"\u231a"
66
- u"\ufe0f" # dingbats
67
- u"\u3030"
68
- "]+", re.UNICODE)
69
 
70
  def preprocess_reviews(reviews):
71
  reviews['text'] = reviews['title'] + ' . ' + reviews['review']
 
46
  phrase = re.sub(r"[\'’]m", " am", phrase)
47
  return phrase
48
 
49
+ emoji_regex = ''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def preprocess_reviews(reviews):
52
  reviews['text'] = reviews['title'] + ' . ' + reviews['review']