Spaces:

neuronslabs
/

uplift_modeling

Sleeping

App Files Files Community

uplift_modeling / mlops_utils /wandb_utils.py

howardroark

initial commit

6f4f21f over 1 year ago

raw

history blame

1.86 kB

	import wandb
	import pandas as pd
	import os

	def upload_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type='raw_dataset'):
	with wandb.init(project=project_name, job_type='load-data') as run:
	dataset_artifact = wandb.Artifact(dataset_name, type=dataset_type)
	for dir in dirs:
	dataset_artifact.add_dir(dir)
	run.log_artifact(dataset_artifact)

	def eda_work_with_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type):
	with wandb.init(project=project_name, job_type='eda') as run:
	dataset_artifact = run.use_artifact(dataset_name, type=dataset_type)
	eda_artifact = wandb.Artifact('eda_result', type=artifact_type)
	for dir in dirs:
	eda_artifact.add_dir(dir)
	run.log_artifact(eda_artifact)

	run.log({
	"eda_result": pd.read_csv(
	os.path.join(dirs[0], "kl_feature_importance.csv")
	)
	}
	)

	def training_results_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type, model_name, job_type='train'):
	with wandb.init(project=project_name, job_type=job_type) as run:
	dataset_artifact = run.use_artifact(dataset_name, type=dataset_type)
	model_artifact = wandb.Artifact(model_name, type=artifact_type)
	for dir in dirs:
	model_artifact.add_dir(dir)
	run.log_artifact(model_artifact)

	if job_type == 'train':
	run.log({
	"discount_05_feature_importance": pd.read_csv(
	os.path.join(dirs[0], "discount_05_feature_importance.csv")
	),
	"discount_10_feature_importance": pd.read_csv(
	os.path.join(dirs[0], "discount_10_feature_importance.csv")
	),
	"discount_15_feature_importance": pd.read_csv(
	os.path.join(dirs[0], "discount_15_feature_importance.csv")
	),
	}
	)