Spaces:
Sleeping
Sleeping
File size: 1,738 Bytes
e66f533 c3375eb e66f533 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import os
import nltk
import logging
# Set up logging
logger = logging.getLogger('gradio_app.data_handler')
def download_nltk_resources():
"""Download required NLTK resources if not already downloaded"""
try:
# Create nltk_data directory in the user's home directory if it doesn't exist
nltk_data_path = os.path.expanduser("~/nltk_data")
os.makedirs(nltk_data_path, exist_ok=True)
# Add this path to NLTK's data path
nltk.data.path.append(nltk_data_path)
# Download required resources
resources = ['punkt', 'punkt_tab', 'wordnet', 'stopwords', 'vader_lexicon']
for resource in resources:
try:
# Different resources can be in different directories in NLTK
locations = [
f'tokenizers/{resource}',
f'corpora/{resource}',
f'taggers/{resource}',
f'{resource}'
]
found = False
for location in locations:
try:
nltk.data.find(location)
logger.info(f"Resource {resource} already downloaded")
found = True
break
except LookupError:
continue
if not found:
logger.info(f"Downloading {resource}...")
nltk.download(resource, quiet=True)
except Exception as e:
logger.error(f"Error with resource {resource}: {e}")
logger.info("NLTK resources check completed")
except Exception as e:
logger.error(f"Error downloading NLTK resources: {e}") |