ttm-webapp-hf / pipeline /tibetan_stopwords.py
daniel-wojahn's picture
maintenance and alignment prototype
bda2b5b
import logging
logger = logging.getLogger(__name__)
def get_stopwords(use_lite: bool = False) -> set:
"""
Returns a set of Tibetan stopwords by importing them from the respective .py files.
Args:
use_lite (bool): If True, returns a smaller, less aggressive list of stopwords
from stopwords_lite_bo.py.
Otherwise, returns the full list from stopwords_bo.py.
Returns:
set: A set of stopword strings. Returns an empty set on failure.
"""
stopwords_set = set()
try:
if use_lite:
from .stopwords_lite_bo import STOPWORDS
stopwords_set = STOPWORDS
else:
from .stopwords_bo import STOPWORDS
stopwords_set = STOPWORDS
source_name = module_name.lstrip('.')
logger.info(f"Successfully loaded {len(stopwords_set)} stopwords from {source_name}.py")
except ImportError:
logger.error(
"Failed to import STOPWORDS from stopwords file. "
"Ensure the file exists in the 'pipeline' directory, is a Python module (ends in .py), "
"and is importable (e.g., no syntax errors)."
)
except AttributeError:
logger.error(
"Variable 'STOPWORDS' (all caps) not found in stopwords file. "
"Please ensure the stopword set is defined with this name within the module."
)
except Exception as e:
logger.error(f"An unexpected error occurred while loading stopwords: {e}")
return stopwords_set