File size: 1,738 Bytes
e66f533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3375eb
e66f533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import nltk
import logging

# Set up logging
logger = logging.getLogger('gradio_app.data_handler')


def download_nltk_resources():
    """Download required NLTK resources if not already downloaded"""
    try:
        # Create nltk_data directory in the user's home directory if it doesn't exist
        nltk_data_path = os.path.expanduser("~/nltk_data")
        os.makedirs(nltk_data_path, exist_ok=True)

        # Add this path to NLTK's data path
        nltk.data.path.append(nltk_data_path)

        # Download required resources
        resources = ['punkt', 'punkt_tab', 'wordnet', 'stopwords', 'vader_lexicon']
        for resource in resources:
            try:
                # Different resources can be in different directories in NLTK
                locations = [
                    f'tokenizers/{resource}',
                    f'corpora/{resource}',
                    f'taggers/{resource}',
                    f'{resource}'
                ]

                found = False
                for location in locations:
                    try:
                        nltk.data.find(location)
                        logger.info(f"Resource {resource} already downloaded")
                        found = True
                        break
                    except LookupError:
                        continue

                if not found:
                    logger.info(f"Downloading {resource}...")
                    nltk.download(resource, quiet=True)
            except Exception as e:
                logger.error(f"Error with resource {resource}: {e}")

        logger.info("NLTK resources check completed")
    except Exception as e:
        logger.error(f"Error downloading NLTK resources: {e}")