jrocha commited on
Commit
0133578
·
verified ·
1 Parent(s): 6acead2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -17,6 +17,7 @@ import pandas as pd
17
  from sklearn.feature_extraction.text import TfidfVectorizer
18
  from sklearn.metrics.pairwise import cosine_similarity
19
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 
20
 
21
  """# data - text"""
22
 
@@ -30,10 +31,10 @@ def remove_symbols(text):
30
  cleaned_text = "".join([char for char in text if char not in remove_chars])
31
 
32
  # Remove non-ASCII characters
33
- pattern_ascii = r'[^\x00-\x7F]' # Matches any character outside the ASCII range
34
- filtered_text = re.sub(pattern_ascii, '', cleaned_text)
35
 
36
- return filtered_text
37
 
38
  def context_func(message):
39
  # Create a TF-IDF vectorizer
 
17
  from sklearn.feature_extraction.text import TfidfVectorizer
18
  from sklearn.metrics.pairwise import cosine_similarity
19
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering
20
+ #import re
21
 
22
  """# data - text"""
23
 
 
31
  cleaned_text = "".join([char for char in text if char not in remove_chars])
32
 
33
  # Remove non-ASCII characters
34
+ #pattern_ascii = r'[^\x00-\x7F]' # Matches any character outside the ASCII range
35
+ #filtered_text = re.sub(pattern_ascii, '', cleaned_text)
36
 
37
+ return cleaned_text
38
 
39
  def context_func(message):
40
  # Create a TF-IDF vectorizer