525GradioApp / data_handler.py
Ryan
update
c3375eb
raw
history blame
1.74 kB
import os
import nltk
import logging
# Set up logging
logger = logging.getLogger('gradio_app.data_handler')
def download_nltk_resources():
"""Download required NLTK resources if not already downloaded"""
try:
# Create nltk_data directory in the user's home directory if it doesn't exist
nltk_data_path = os.path.expanduser("~/nltk_data")
os.makedirs(nltk_data_path, exist_ok=True)
# Add this path to NLTK's data path
nltk.data.path.append(nltk_data_path)
# Download required resources
resources = ['punkt', 'punkt_tab', 'wordnet', 'stopwords', 'vader_lexicon']
for resource in resources:
try:
# Different resources can be in different directories in NLTK
locations = [
f'tokenizers/{resource}',
f'corpora/{resource}',
f'taggers/{resource}',
f'{resource}'
]
found = False
for location in locations:
try:
nltk.data.find(location)
logger.info(f"Resource {resource} already downloaded")
found = True
break
except LookupError:
continue
if not found:
logger.info(f"Downloading {resource}...")
nltk.download(resource, quiet=True)
except Exception as e:
logger.error(f"Error with resource {resource}: {e}")
logger.info("NLTK resources check completed")
except Exception as e:
logger.error(f"Error downloading NLTK resources: {e}")