camparchimedes commited on
Commit
7ec9f42
·
verified ·
1 Parent(s): 3698f30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -21,9 +21,11 @@ from PIL import Image
21
  # Suppress warnings
22
  warnings.filterwarnings("ignore")
23
 
24
- # NLTK dependencies
25
  nltk.download('punkt', quiet=True)
26
  nltk.download('stopwords', quiet=True)
 
 
27
 
28
  # Convert m4a audio to wav format
29
  def convert_to_wav(audio_file):
@@ -73,7 +75,7 @@ def clean_text(text):
73
  return text
74
 
75
  def preprocess_text(text):
76
- words = word_tokenize(text)
77
  stop_words = set(stopwords.words('norwegian'))
78
  words = [word for word in words if word.lower() not in stop_words]
79
  return ' '.join(words)
 
21
  # Suppress warnings
22
  warnings.filterwarnings("ignore")
23
 
24
+ # Ascertain that NLTK dependencies for Norwegian are downloaded
25
  nltk.download('punkt', quiet=True)
26
  nltk.download('stopwords', quiet=True)
27
+ stop_words = set(stopwords.words('norwegian'))
28
+
29
 
30
  # Convert m4a audio to wav format
31
  def convert_to_wav(audio_file):
 
75
  return text
76
 
77
  def preprocess_text(text):
78
+ words = word_tokenize(text, language="norwegian")
79
  stop_words = set(stopwords.words('norwegian'))
80
  words = [word for word in words if word.lower() not in stop_words]
81
  return ' '.join(words)