Spaces:
Sleeping
Sleeping
File size: 1,862 Bytes
6f4f21f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import wandb
import pandas as pd
import os
def upload_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type='raw_dataset'):
with wandb.init(project=project_name, job_type='load-data') as run:
dataset_artifact = wandb.Artifact(dataset_name, type=dataset_type)
for dir in dirs:
dataset_artifact.add_dir(dir)
run.log_artifact(dataset_artifact)
def eda_work_with_dataset_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type):
with wandb.init(project=project_name, job_type='eda') as run:
dataset_artifact = run.use_artifact(dataset_name, type=dataset_type)
eda_artifact = wandb.Artifact('eda_result', type=artifact_type)
for dir in dirs:
eda_artifact.add_dir(dir)
run.log_artifact(eda_artifact)
run.log({
"eda_result": pd.read_csv(
os.path.join(dirs[0], "kl_feature_importance.csv")
)
}
)
def training_results_to_wandb(dirs, project_name, dataset_name, dataset_type, artifact_type, model_name, job_type='train'):
with wandb.init(project=project_name, job_type=job_type) as run:
dataset_artifact = run.use_artifact(dataset_name, type=dataset_type)
model_artifact = wandb.Artifact(model_name, type=artifact_type)
for dir in dirs:
model_artifact.add_dir(dir)
run.log_artifact(model_artifact)
if job_type == 'train':
run.log({
"discount_05_feature_importance": pd.read_csv(
os.path.join(dirs[0], "discount_05_feature_importance.csv")
),
"discount_10_feature_importance": pd.read_csv(
os.path.join(dirs[0], "discount_10_feature_importance.csv")
),
"discount_15_feature_importance": pd.read_csv(
os.path.join(dirs[0], "discount_15_feature_importance.csv")
),
}
) |