Spaces:

hardiktiwari
/

tensora-autotrain

Sleeping

App Files Files Community

tensora-autotrain / src /autotrain /app /models.py

hardiktiwari

Upload 244 files

33d4721 verified 3 months ago

raw

history blame contribute delete

10.6 kB

	import collections

	from huggingface_hub import list_models


	def get_sorted_models(hub_models):
	"""
	Filters and sorts a list of models based on their download count.

	Args:
	hub_models (list): A list of model objects. Each model object must have the attributes 'id', 'downloads', and 'private'.

	Returns:
	list: A list of model IDs sorted by their download count in descending order. Only includes models that are not private.
	"""
	hub_models = [{"id": m.id, "downloads": m.downloads} for m in hub_models if m.private is False]
	hub_models = sorted(hub_models, key=lambda x: x["downloads"], reverse=True)
	hub_models = [m["id"] for m in hub_models]
	return hub_models


	def _fetch_text_classification_models():
	"""
	Fetches and sorts text classification models from the Hugging Face model hub.

	This function retrieves models for the tasks "fill-mask" and "text-classification"
	from the Hugging Face model hub, sorts them by the number of downloads, and combines
	them into a single list. Additionally, it fetches trending models based on the number
	of likes in the past 7 days, sorts them, and places them at the beginning of the list
	if they are not already included.

	Returns:
	list: A sorted list of model identifiers from the Hugging Face model hub.
	"""
	hub_models1 = list(
	list_models(
	task="fill-mask",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models2 = list(
	list_models(
	task="text-classification",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models = list(hub_models1) + list(hub_models2)
	hub_models = get_sorted_models(hub_models)

	trending_models = list(
	list_models(
	task="fill-mask",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models

	return hub_models


	def _fetch_llm_models():
	hub_models = list(
	list_models(
	task="text-generation",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models = get_sorted_models(hub_models)
	trending_models = list(
	list_models(
	task="text-generation",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models
	return hub_models


	def _fetch_image_classification_models():
	hub_models = list(
	list_models(
	task="image-classification",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models = get_sorted_models(hub_models)

	trending_models = list(
	list_models(
	task="image-classification",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models

	return hub_models


	def _fetch_image_object_detection_models():
	hub_models = list(
	list_models(
	task="object-detection",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	pipeline_tag="object-detection",
	)
	)
	hub_models = get_sorted_models(hub_models)

	trending_models = list(
	list_models(
	task="object-detection",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	pipeline_tag="object-detection",
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models

	return hub_models


	def _fetch_seq2seq_models():
	hub_models = list(
	list_models(
	task="text2text-generation",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models = get_sorted_models(hub_models)
	trending_models = list(
	list_models(
	task="text2text-generation",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models
	return hub_models


	def _fetch_token_classification_models():
	hub_models1 = list(
	list_models(
	task="fill-mask",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models2 = list(
	list_models(
	task="token-classification",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	)
	)
	hub_models = list(hub_models1) + list(hub_models2)
	hub_models = get_sorted_models(hub_models)

	trending_models = list(
	list_models(
	task="fill-mask",
	library="transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models

	return hub_models


	def _fetch_st_models():
	hub_models1 = list(
	list_models(
	task="sentence-similarity",
	library="sentence-transformers",
	sort="downloads",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	hub_models2 = list(
	list_models(
	task="fill-mask",
	library="transformers",
	sort="downloads",
	direction=-1,
	limit=30,
	full=False,
	)
	)

	hub_models = list(hub_models1) + list(hub_models2)
	hub_models = get_sorted_models(hub_models)

	trending_models = list(
	list_models(
	task="sentence-similarity",
	library="sentence-transformers",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	)
	)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models
	return hub_models


	def _fetch_vlm_models():
	hub_models1 = list(
	list_models(
	task="image-text-to-text",
	sort="downloads",
	direction=-1,
	limit=100,
	full=False,
	filter=["paligemma"],
	)
	)
	# hub_models2 = list(
	# list_models(
	# task="image-text-to-text",
	# sort="downloads",
	# direction=-1,
	# limit=100,
	# full=False,
	# filter=["florence2"],
	# )
	# )
	hub_models2 = []
	hub_models = list(hub_models1) + list(hub_models2)
	hub_models = get_sorted_models(hub_models)

	trending_models1 = list(
	list_models(
	task="image-text-to-text",
	sort="likes7d",
	direction=-1,
	limit=30,
	full=False,
	filter=["paligemma"],
	)
	)
	# trending_models2 = list(
	# list_models(
	# task="image-text-to-text",
	# sort="likes7d",
	# direction=-1,
	# limit=30,
	# full=False,
	# filter=["florence2"],
	# )
	# )
	trending_models2 = []
	trending_models = list(trending_models1) + list(trending_models2)
	if len(trending_models) > 0:
	trending_models = get_sorted_models(trending_models)
	hub_models = [m for m in hub_models if m not in trending_models]
	hub_models = trending_models + hub_models
	return hub_models


	def fetch_models():
	_mc = collections.defaultdict(list)
	_mc["text-classification"] = _fetch_text_classification_models()
	_mc["llm"] = _fetch_llm_models()
	_mc["image-classification"] = _fetch_image_classification_models()
	_mc["image-regression"] = _fetch_image_classification_models()
	_mc["seq2seq"] = _fetch_seq2seq_models()
	_mc["token-classification"] = _fetch_token_classification_models()
	_mc["text-regression"] = _fetch_text_classification_models()
	_mc["image-object-detection"] = _fetch_image_object_detection_models()
	_mc["sentence-transformers"] = _fetch_st_models()
	_mc["vlm"] = _fetch_vlm_models()
	_mc["extractive-qa"] = _fetch_text_classification_models()

	# tabular-classification
	_mc["tabular-classification"] = [
	"xgboost",
	"random_forest",
	"ridge",
	"logistic_regression",
	"svm",
	"extra_trees",
	"adaboost",
	"decision_tree",
	"knn",
	]

	# tabular-regression
	_mc["tabular-regression"] = [
	"xgboost",
	"random_forest",
	"ridge",
	"svm",
	"extra_trees",
	"adaboost",
	"decision_tree",
	"knn",
	]
	return _mc