| import pandas as pd | |
| from utils.read_config import get_args | |
| # Function to load sample of dataset | |
| def load_sample(num_sample_records, sample_method, df, col_name): | |
| sample_first_records = get_args("first_records") | |
| sample_random_seed = get_args("random_seed") | |
| num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records | |
| # Keep only required column | |
| df = df[[col_name]] | |
| if sample_method == "First": | |
| df = df.iloc[:num_sample_records].copy().reset_index() | |
| if sample_method == "Last": | |
| df = df.iloc[-num_sample_records:].copy().reset_index() | |
| if sample_method == "Random": | |
| df = df.sample(num_sample_records, | |
| random_state=sample_random_seed).copy().reset_index() | |
| return df | |