Source code for c3.libraries.sampling

"""Functions to select samples from a dataset by various criteria."""

import random
import numpy as np

sampling = dict()


[docs]def sampling_reg_deco(func): """ Decorator for making registry of functions """ sampling[str(func.__name__)] = func return func
[docs]@sampling_reg_deco def all(learn_from, batch_size): """ Return all points. Parameters ---------- learn_from : list List of data points batch_size : int Number of points to select Returns ------- list All indeces. """ total_size = len(learn_from) all = list(range(total_size)) return all
[docs]@sampling_reg_deco def from_start(learn_from, batch_size): """ Select from the beginning. Parameters ---------- learn_from : list List of data points batch_size : int Number of points to select Returns ------- list Selected indices. """ total_size = len(learn_from) all = list(range(total_size)) return all[:batch_size]
[docs]@sampling_reg_deco def from_end(learn_from, batch_size): """ Select from the end. Parameters ---------- learn_from : list List of data points batch_size : int Number of points to select Returns ------- list Selected indices. """ total_size = len(learn_from) all = list(range(total_size)) return all[-batch_size:]
[docs]@sampling_reg_deco def even(learn_from, batch_size): """ Select evenly distanced samples across the set. Parameters ---------- learn_from : list List of data points batch_size : int Number of points to select Returns ------- list Selected indices. """ total_size = len(learn_from) all = list(range(total_size)) n = int(np.ceil(total_size / batch_size)) return all[::n]
[docs]@sampling_reg_deco def random_sample(learn_from, batch_size): """ Select randomly. Parameters ---------- learn_from : list List of data points. batch_size : int Number of points to select. Returns ------- list Selected indices. """ total_size = len(learn_from) all = list(range(total_size)) return random.sample(all, batch_size)
[docs]@sampling_reg_deco def high_std(learn_from, batch_size): """ Select points that have a high ratio of standard deviation to mean. Sampling from ORBIT data, points with a high std have the most coherent error, thus might be suitable for model learning. This has yet to be confirmed beyond anecdotal observation. Parameters ---------- learn_from : list List of data points. batch_size : int Number of points to select. Returns ------- list Selected indices. """ a_val = [] for sample in learn_from: a_val.append(np.std(sample["results"]) / np.mean(sample["results"])) return np.argsort(np.array(a_val))[-batch_size:]
[docs]@sampling_reg_deco def even_fid(learn_from, batch_size): """ Select evenly among reached fidelities. Parameters ---------- learn_from : list List of data points. batch_size : int Number of points to select. Returns ------- list Selected indices. """ total_size = len(learn_from) res = [] for sample in learn_from: res.append(np.mean(sample["results"])) n = int(np.ceil(total_size / batch_size)) return np.argsort(np.array(res))[::n]