from setuptools import setup, find_packages import subprocess import sys with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() with open("requirements.txt", "r", encoding="utf-8") as fh: requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")] def setup_spacy_model(): """ Download spaCy model. """ subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) def setup_models(): """ Download other required models. """ import tensorflow_hub as hub from sklearn.feature_extraction.text import TfidfVectorizer from transformers import ( AutoTokenizer, GPT2TokenizerFast, MarianTokenizer ) # Download Universal Sentence Encoder _ = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4') # Download paraphraser model _ = AutoTokenizer.from_pretrained('humarin/chatgpt_paraphraser_on_T5_base') # Download translation models source_lang, pivot_lang, target_lang = 'en', 'de', 'es' model_names = [ f'Helsinki-NLP/opus-mt-{source_lang}-{pivot_lang}', f'Helsinki-NLP/opus-mt-{pivot_lang}-{target_lang}', f'Helsinki-NLP/opus-mt-{target_lang}-{source_lang}' ] for model_name in model_names: _ = MarianTokenizer.from_pretrained(model_name) # Download GPT-2 _ = GPT2TokenizerFast.from_pretrained('gpt2') def setup_nltk(): """ Download required NLTK data. """ import nltk required_packages = [ 'wordnet', 'averaged_perceptron_tagger_eng' ] for package in required_packages: try: print(f"Downloading {package}...") nltk.download(package) print(f"Successfully downloaded {package}") except Exception as e: print(f"Warning: Could not download {package}: {str(e)}") setup( name="text-data-augmenter", version="0.1.0", author="Joe Armani", author_email="joseph_armani@csuglobal.edu", description="A tool for generating high-quality dialogue variations", packages=find_packages(), classifiers=[ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Text Processing :: Linguistic", ], python_requires=">=3.8", install_requires=requirements, entry_points={ "console_scripts": [ "dialogue-augment=dialogue_augmenter.main:main", ], }, include_package_data=True, package_data={ "dialogue_augmenter": ["data/*.json", "config/*.yaml"], }, ) if __name__ == '__main__': setup_spacy_model() setup_models() setup_nltk()