Spaces:
Sleeping
Sleeping
import os | |
import nltk | |
import logging | |
# Set up logging | |
logger = logging.getLogger('gradio_app.data_handler') | |
def download_nltk_resources(): | |
"""Download required NLTK resources if not already downloaded""" | |
try: | |
# Create nltk_data directory in the user's home directory if it doesn't exist | |
nltk_data_path = os.path.expanduser("~/nltk_data") | |
os.makedirs(nltk_data_path, exist_ok=True) | |
# Add this path to NLTK's data path | |
nltk.data.path.append(nltk_data_path) | |
# Download required resources | |
resources = ['punkt', 'punkt_tab', 'wordnet', 'stopwords', 'vader_lexicon'] | |
for resource in resources: | |
try: | |
# Different resources can be in different directories in NLTK | |
locations = [ | |
f'tokenizers/{resource}', | |
f'corpora/{resource}', | |
f'taggers/{resource}', | |
f'{resource}' | |
] | |
found = False | |
for location in locations: | |
try: | |
nltk.data.find(location) | |
logger.info(f"Resource {resource} already downloaded") | |
found = True | |
break | |
except LookupError: | |
continue | |
if not found: | |
logger.info(f"Downloading {resource}...") | |
nltk.download(resource, quiet=True) | |
except Exception as e: | |
logger.error(f"Error with resource {resource}: {e}") | |
logger.info("NLTK resources check completed") | |
except Exception as e: | |
logger.error(f"Error downloading NLTK resources: {e}") |