from setuptools import setup, find_packages import subprocess import sys import platform with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() with open("requirements.txt", "r", encoding="utf-8") as fh: requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")] def setup_spacy_models(models=['en_core_web_sm', 'en_core_web_md']): """ Download the specified spaCy model. Args: models(List): List[str] of the names of the spaCy model to download. """ try: for model in models: print(f"Downloading spaCy model: {model}") subprocess.check_call([sys.executable, "-m", "spacy", "download", model]) print(f"Successfully downloaded spaCy model: {model}") except subprocess.CalledProcessError as e: print(f"Error downloading spaCy model: {model}") print(e) def setup_gpu_dependencies(): """Setup GPU-specific dependencies.""" cuda_available = False # Check CUDA availability try: import torch cuda_available = torch.cuda.is_available() except ImportError: pass if cuda_available: try: subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-gpu>=1.7.0"]) print("Successfully installed faiss-gpu") except subprocess.CalledProcessError: print("Failed to install faiss-gpu. Falling back to faiss-cpu") subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"]) else: subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"]) def setup_models(): """ Download other required models. """ import tensorflow_hub as hub from sklearn.feature_extraction.text import TfidfVectorizer from transformers import ( AutoTokenizer, AutoModel, GPT2TokenizerFast, MarianTokenizer, DistilBertTokenizer, DistilBertModel ) # Cache the models tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertModel.from_pretrained('distilbert-base-uncased') # Download Universal Sentence Encoder _ = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4') # Download paraphraser model _ = AutoTokenizer.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base') # Download translation models source_lang, pivot_lang, target_lang = 'en', 'de', 'es' model_names = [ f'Helsinki-NLP/opus-mt-{source_lang}-{pivot_lang}', f'Helsinki-NLP/opus-mt-{pivot_lang}-{target_lang}', f'Helsinki-NLP/opus-mt-{target_lang}-{source_lang}' ] for model_name in model_names: _ = MarianTokenizer.from_pretrained(model_name) # Download GPT-2 _ = GPT2TokenizerFast.from_pretrained('gpt2') def setup_nltk(): """ Download required NLTK data. """ import nltk required_packages = [ 'wordnet', 'averaged_perceptron_tagger_eng' ] for package in required_packages: try: print(f"Downloading {package}...") nltk.download(package) print(f"Successfully downloaded {package}") except Exception as e: print(f"Warning: Could not download {package}: {str(e)}") def setup_faiss(): """ Download required faiss library. """ current_os = platform.system() cuda_available = False # Function to check CUDA availability def check_cuda(): try: import torch return torch.cuda.is_available() except: return False if current_os == "Linux" and check_cuda(): # Attempt to install faiss-gpu try: print("Attempting to install faiss-gpu...") subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-gpu>=1.7.0"]) print("Successfully installed faiss-gpu") return except subprocess.CalledProcessError: print("Failed to install faiss-gpu. Falling back to faiss-cpu.") # Install faiss-cpu as the default try: print("Installing faiss-cpu...") subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"]) print("Successfully installed faiss-cpu") except subprocess.CalledProcessError as e: print("Error installing faiss-cpu") print(e) setup( name="retrieval-chatbot", version="0.2.0", author="Joe Armani", author_email="joseph_armani@csuglobal.edu", description="A retrieval-based chatbot with enhanced validation", long_description=long_description, long_description_content_type="text/markdown", packages=find_packages(), classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Text Processing :: Linguistic", ], python_requires=">=3.8", install_requires=requirements, extras_require={ 'dev': [ 'pytest>=7.0.0', 'black>=22.0.0', 'isort>=5.10.0', 'mypy>=1.0.0', ], 'gpu': [ 'faiss-gpu>=1.7.0', ], }, entry_points={ "console_scripts": [ "dialogue-augment=dialogue_augmenter.main:main", "run-chatbot=chatbot.main:main", ], }, include_package_data=True, package_data={ "chatbot": ["config/*.yaml"], "dialogue_augmenter": ["data/*.json", "config/*.yaml"], }, ) if __name__ == '__main__': setup_spacy_models() setup_gpu_dependencies() setup_models() setup_nltk() setup_faiss()