Spaces:

JoeArmani
/

csc525_retrieval_based_chatbot

Sleeping

csc525_retrieval_based_chatbot / setup.py

JoeArmani

summarization, reranker, environment setup, and response quality checker

f7b283c 9 months ago

6.13 kB

	from setuptools import setup, find_packages
	import subprocess
	import sys
	import platform

	with open("README.md", "r", encoding="utf-8") as fh:
	long_description = fh.read()

	with open("requirements.txt", "r", encoding="utf-8") as fh:
	requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")]

	def setup_spacy_models(models=['en_core_web_sm', 'en_core_web_md']):
	"""
	Download the specified spaCy model.

	Args:
	models(List): List[str] of the names of the spaCy model to download.
	"""
	try:
	for model in models:
	print(f"Downloading spaCy model: {model}")
	subprocess.check_call([sys.executable, "-m", "spacy", "download", model])
	print(f"Successfully downloaded spaCy model: {model}")
	except subprocess.CalledProcessError as e:
	print(f"Error downloading spaCy model: {model}")
	print(e)

	def setup_gpu_dependencies():
	"""Setup GPU-specific dependencies."""
	cuda_available = False

	# Check CUDA availability
	try:
	import torch
	cuda_available = torch.cuda.is_available()
	except ImportError:
	pass

	if cuda_available:
	try:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-gpu>=1.7.0"])
	print("Successfully installed faiss-gpu")
	except subprocess.CalledProcessError:
	print("Failed to install faiss-gpu. Falling back to faiss-cpu")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"])
	else:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"])

	def setup_models():
	"""
	Download other required models.
	"""
	import tensorflow_hub as hub
	from sklearn.feature_extraction.text import TfidfVectorizer
	from transformers import (
	AutoTokenizer,
	AutoModel,
	GPT2TokenizerFast,
	MarianTokenizer,
	DistilBertTokenizer,
	DistilBertModel
	)

	# Cache the models
	tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
	model = DistilBertModel.from_pretrained('distilbert-base-uncased')

	# Download Universal Sentence Encoder
	_ = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')

	# Download paraphraser model
	_ = AutoTokenizer.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base')

	# Download translation models
	source_lang, pivot_lang, target_lang = 'en', 'de', 'es'
	model_names = [
	f'Helsinki-NLP/opus-mt-{source_lang}-{pivot_lang}',
	f'Helsinki-NLP/opus-mt-{pivot_lang}-{target_lang}',
	f'Helsinki-NLP/opus-mt-{target_lang}-{source_lang}'
	]
	for model_name in model_names:
	_ = MarianTokenizer.from_pretrained(model_name)

	# Download GPT-2
	_ = GPT2TokenizerFast.from_pretrained('gpt2')

	def setup_nltk():
	"""
	Download required NLTK data.
	"""
	import nltk
	required_packages = [
	'wordnet',
	'averaged_perceptron_tagger_eng'
	]

	for package in required_packages:
	try:
	print(f"Downloading {package}...")
	nltk.download(package)
	print(f"Successfully downloaded {package}")
	except Exception as e:
	print(f"Warning: Could not download {package}: {str(e)}")

	def setup_faiss():
	"""
	Download required faiss library.
	"""
	current_os = platform.system()
	cuda_available = False

	# Function to check CUDA availability
	def check_cuda():
	try:
	import torch
	return torch.cuda.is_available()
	except:
	return False

	if current_os == "Linux" and check_cuda():
	# Attempt to install faiss-gpu
	try:
	print("Attempting to install faiss-gpu...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-gpu>=1.7.0"])
	print("Successfully installed faiss-gpu")
	return
	except subprocess.CalledProcessError:
	print("Failed to install faiss-gpu. Falling back to faiss-cpu.")

	# Install faiss-cpu as the default
	try:
	print("Installing faiss-cpu...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "faiss-cpu>=1.7.0"])
	print("Successfully installed faiss-cpu")
	except subprocess.CalledProcessError as e:
	print("Error installing faiss-cpu")
	print(e)

	setup(
	name="retrieval-chatbot",
	version="0.2.0",
	author="Joe Armani",
	author_email="[email protected]",
	description="A retrieval-based chatbot with enhanced validation",
	long_description=long_description,
	long_description_content_type="text/markdown",
	packages=find_packages(),
	classifiers=[
	"Development Status :: 3 - Alpha",
	"Intended Audience :: Science/Research",
	"License :: OSI Approved :: MIT License",
	"Operating System :: OS Independent",
	"Programming Language :: Python :: 3",
	"Programming Language :: Python :: 3.8",
	"Programming Language :: Python :: 3.9",
	"Programming Language :: Python :: 3.10",
	"Topic :: Scientific/Engineering :: Artificial Intelligence",
	"Topic :: Text Processing :: Linguistic",
	],
	python_requires=">=3.8",
	install_requires=requirements,
	extras_require={
	'dev': [
	'pytest>=7.0.0',
	'black>=22.0.0',
	'isort>=5.10.0',
	'mypy>=1.0.0',
	],
	'gpu': [
	'faiss-gpu>=1.7.0',
	],
	},
	entry_points={
	"console_scripts": [
	"dialogue-augment=dialogue_augmenter.main:main",
	"run-chatbot=chatbot.main:main",
	],
	},
	include_package_data=True,
	package_data={
	"chatbot": ["config/*.yaml"],
	"dialogue_augmenter": ["data/.json", "config/.yaml"],
	},
	)

	if __name__ == '__main__':
	setup_spacy_models()
	setup_gpu_dependencies()
	setup_models()
	setup_nltk()
	setup_faiss()