import wandb import pandas as pd import os def upload_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type='raw_dataset'): with wandb.init(project=project_name, job_type='load-data') as run: dataset_artifact = wandb.Artifact(dataset_name, type=dataset_type) for dir in dirs: dataset_artifact.add_dir(dir) run.log_artifact(dataset_artifact) def eda_work_with_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type): with wandb.init(project=project_name, job_type='eda') as run: dataset_artifact = run.use_artifact(dataset_name, type=dataset_type) eda_artifact = wandb.Artifact('eda_result', type=artifact_type) for dir in dirs: eda_artifact.add_dir(dir) run.log_artifact(eda_artifact) run.log({ "eda_result": pd.read_csv( os.path.join(dirs[0], "kl_feature_importance.csv") ) } ) def training_results_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type, model_name, job_type='train'): with wandb.init(project=project_name, job_type=job_type) as run: dataset_artifact = run.use_artifact(dataset_name, type=dataset_type) model_artifact = wandb.Artifact(model_name, type=artifact_type) for dir in dirs: model_artifact.add_dir(dir) run.log_artifact(model_artifact) if job_type == 'train': run.log({ "discount_05_feature_importance": pd.read_csv( os.path.join(dirs[0], "discount_05_feature_importance.csv") ), "discount_10_feature_importance": pd.read_csv( os.path.join(dirs[0], "discount_10_feature_importance.csv") ), "discount_15_feature_importance": pd.read_csv( os.path.join(dirs[0], "discount_15_feature_importance.csv") ), } )