Spaces:
Sleeping
Sleeping
| import nltk | |
| import nltk.downloader | |
| import spacy | |
| from core.config import settings | |
| from pathlib import Path | |
| import en_core_web_sm | |
| def initialize_nlp(): | |
| print("Initializing NLP resources...") | |
| # nltk_data_path = Path("/tmp/nltk_data") | |
| # nltk_data_path.mkdir(parents=True, exist_ok=True) | |
| # nltk.data.path.append(str(nltk_data_path)) | |
| # # Download NLTK resources | |
| nltk_resources = [ | |
| 'maxent_ne_chunker', | |
| 'words', | |
| 'treebank', | |
| 'maxent_treebank_pos_tagger', | |
| 'punkt', | |
| 'averaged_perceptron_tagger' | |
| ] | |
| # for resource in nltk_resources: | |
| # nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True) | |
| # Load spaCy model | |
| # spacy.load(settings.SPACY_MODEL) | |
| spacy.load("en_core_web_sm") | |
| en_core_web_sm.load() | |
| for resource in nltk_resources: | |
| nltk.download(nltk_resources) | |
| print("NLP resources initialized successfully.") | |
| # Global variables to store initialized resources | |
| nlp = None | |
| nltk_initialized = False | |
| def get_nlp(): | |
| global nlp | |
| if nlp is None: | |
| nlp = spacy.load(settings.SPACY_MODEL) | |
| return nlp | |
| def get_nltk(): | |
| global nltk_initialized | |
| if not nltk_initialized: | |
| nltk.downloader.download('punkt', quiet=True) | |
| nltk.download('averaged_perceptron_tagger', quiet=True) | |
| nltk_initialized = True | |
| return nltk |