Spaces:

JoeArmani
/

csc525_retrieval_based_chatbot

Sleeping

csc525_retrieval_based_chatbot / setup.py

JoeArmani

Initial commit

3190e1e 6 months ago

3.05 kB

	from setuptools import setup, find_packages
	import subprocess
	import sys

	with open("README.md", "r", encoding="utf-8") as fh:
	long_description = fh.read()

	with open("requirements.txt", "r", encoding="utf-8") as fh:
	requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")]

	def setup_spacy_model():
	"""
	Download spaCy model.
	"""
	subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])

	def setup_models():
	"""
	Download other required models.
	"""
	import tensorflow_hub as hub
	from sklearn.feature_extraction.text import TfidfVectorizer
	from transformers import (
	AutoTokenizer,
	GPT2TokenizerFast,
	MarianTokenizer
	)

	# Download Universal Sentence Encoder
	_ = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')

	# Download paraphraser model
	_ = AutoTokenizer.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base')

	# Download translation models
	source_lang, pivot_lang, target_lang = 'en', 'de', 'es'
	model_names = [
	f'Helsinki-NLP/opus-mt-{source_lang}-{pivot_lang}',
	f'Helsinki-NLP/opus-mt-{pivot_lang}-{target_lang}',
	f'Helsinki-NLP/opus-mt-{target_lang}-{source_lang}'
	]
	for model_name in model_names:
	_ = MarianTokenizer.from_pretrained(model_name)

	# Download GPT-2
	_ = GPT2TokenizerFast.from_pretrained('gpt2')

	def setup_nltk():
	"""
	Download required NLTK data.
	"""
	import nltk
	required_packages = [
	'wordnet',
	'averaged_perceptron_tagger_eng'
	]

	for package in required_packages:
	try:
	print(f"Downloading {package}...")
	nltk.download(package)
	print(f"Successfully downloaded {package}")
	except Exception as e:
	print(f"Warning: Could not download {package}: {str(e)}")

	setup(
	name="text-data-augmenter",
	version="0.1.0",
	author="Joe Armani",
	author_email="[email protected]",
	description="A tool for generating high-quality dialogue variations",
	packages=find_packages(),
	classifiers=[
	"Development Status :: 3 - Alpha",
	"Intended Audience :: Science/Research",
	"License :: OSI Approved :: MIT License",
	"Operating System :: OS Independent",
	"Programming Language :: Python :: 3",
	"Programming Language :: Python :: 3.8",
	"Programming Language :: Python :: 3.9",
	"Topic :: Scientific/Engineering :: Artificial Intelligence",
	"Topic :: Text Processing :: Linguistic",
	],
	python_requires=">=3.8",
	install_requires=requirements,
	entry_points={
	"console_scripts": [
	"dialogue-augment=dialogue_augmenter.main:main",
	],
	},
	include_package_data=True,
	package_data={
	"dialogue_augmenter": ["data/.json", "config/.yaml"],
	},
	)

	if __name__ == '__main__':
	setup_spacy_model()
	setup_models()
	setup_nltk()