Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -21,9 +21,11 @@ from PIL import Image
|
|
21 |
# Suppress warnings
|
22 |
warnings.filterwarnings("ignore")
|
23 |
|
24 |
-
# NLTK dependencies
|
25 |
nltk.download('punkt', quiet=True)
|
26 |
nltk.download('stopwords', quiet=True)
|
|
|
|
|
27 |
|
28 |
# Convert m4a audio to wav format
|
29 |
def convert_to_wav(audio_file):
|
@@ -73,7 +75,7 @@ def clean_text(text):
|
|
73 |
return text
|
74 |
|
75 |
def preprocess_text(text):
|
76 |
-
words = word_tokenize(text)
|
77 |
stop_words = set(stopwords.words('norwegian'))
|
78 |
words = [word for word in words if word.lower() not in stop_words]
|
79 |
return ' '.join(words)
|
|
|
21 |
# Suppress warnings
|
22 |
warnings.filterwarnings("ignore")
|
23 |
|
24 |
+
# Ascertain that NLTK dependencies for Norwegian are downloaded
|
25 |
nltk.download('punkt', quiet=True)
|
26 |
nltk.download('stopwords', quiet=True)
|
27 |
+
stop_words = set(stopwords.words('norwegian'))
|
28 |
+
|
29 |
|
30 |
# Convert m4a audio to wav format
|
31 |
def convert_to_wav(audio_file):
|
|
|
75 |
return text
|
76 |
|
77 |
def preprocess_text(text):
|
78 |
+
words = word_tokenize(text, language="norwegian")
|
79 |
stop_words = set(stopwords.words('norwegian'))
|
80 |
words = [word for word in words if word.lower() not in stop_words]
|
81 |
return ' '.join(words)
|