import os import nltk import logging # Set up logging logger = logging.getLogger('gradio_app.data_handler') def download_nltk_resources(): """Download required NLTK resources if not already downloaded""" try: # Create nltk_data directory in the user's home directory if it doesn't exist nltk_data_path = os.path.expanduser("~/nltk_data") os.makedirs(nltk_data_path, exist_ok=True) # Add this path to NLTK's data path nltk.data.path.append(nltk_data_path) # Download required resources resources = ['punkt', 'punkt_tab', 'wordnet', 'stopwords', 'vader_lexicon'] for resource in resources: try: # Different resources can be in different directories in NLTK locations = [ f'tokenizers/{resource}', f'corpora/{resource}', f'taggers/{resource}', f'{resource}' ] found = False for location in locations: try: nltk.data.find(location) logger.info(f"Resource {resource} already downloaded") found = True break except LookupError: continue if not found: logger.info(f"Downloading {resource}...") nltk.download(resource, quiet=True) except Exception as e: logger.error(f"Error with resource {resource}: {e}") logger.info("NLTK resources check completed") except Exception as e: logger.error(f"Error downloading NLTK resources: {e}")