Source code for netpyne.batchtools.search

import ray
import pandas
import os
from ray import tune, train
from ray.air import session, RunConfig
from ray.tune.search.basic_variant import BasicVariantGenerator
from ray.tune.search import create_searcher, ConcurrencyLimiter, SEARCH_ALG_IMPORT
from netpyne.batchtools import runtk
from collections import namedtuple
from batchtk.utils import get_path, SQLiteStorage, ScriptLogger
from io import StringIO
import numpy
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from netpyne.batchtools import submits
from batchtk import runtk
from batchtk.runtk.trial import trial, LABEL_POINTER
import datetime
#import signal #incompatible with signal and threading from ray
#import threading

choice = tune.choice
grid = tune.grid_search
uniform = tune.uniform


[docs]
class LocalGridDispatcher(runtk.dispatchers.LocalDispatcher):

[docs]
    def start(self):
        super().start(restart=True)



[docs]
    def connect(self):
        return



[docs]
    def recv(self, interval):
        return '{"_none_placeholder": 0}'  # dummy json value to return...




[docs]
class SSHGridDispatcher(runtk.dispatchers.SSHDispatcher):

[docs]
    def start(self):
        super().start(restart=True)



[docs]
    def connect(self):
        return



[docs]
    def recv(self, interval):
        return '{"_none_placeholder": 0}'  # dummy json value to return...




[docs]
def ray_optuna_search(dispatcher_constructor: Callable, # constructor for the dispatcher (e.g. INETDispatcher)
                      submit_constructor: Callable, # constructor for the submit (e.g. SHubmitSOCK)
                      run_config: Dict, # batch configuration, (keyword: string pairs to customize the submit template)
                      params: Dict, # search space (dictionary of parameter keys: tune search spaces)
                      label: Optional[str] = 'optuna_search', # label for the search
                      output_path: Optional[str] = '../batch', # directory for storing generated files
                      checkpoint_path: Optional[str] = '../ray', # directory for storing checkpoint files
                      max_concurrent: Optional[int] = 1, # number of concurrent trials to run at one time
                      batch: Optional[bool] = True, # whether concurrent trials should run synchronously or asynchronously
                      num_samples: Optional[int] = 1, # number of trials to run
                      metric: Optional[str|list|tuple] = "loss", # metric to optimize (this should match some key: value pair in the returned data
                      mode: Optional[str|list|tuple] = "min", # either 'min' or 'max' (whether to minimize or maximize the metric
                      optuna_config: Optional[dict] = None, # additional configuration for the optuna search algorithm
                      ray_config: Optional[dict] = None, # additional configuration for the ray initialization
                      clean_checkpoint = True, # whether to clean the checkpoint directory after the search
                      ) -> namedtuple('Study', ['algo', 'results']):
    """ #TODO -- fold this into the ray_search object later---
    ray_optuna_search(...)

    Parameters
    ----------
    dispatcher_constructor:Callable, # constructor for the dispatcher (e.g. INETDispatcher)
    submit_constructor:Callable, # constructor for the submit (e.g. SHubmitSOCK)
    run_config:Dict, # batch configuration, (keyword: string pairs to customize the submit template)
    params:Dict, # search space (dictionary of parameter keys: tune search spaces)
    label:Optional[str] = 'optuna_search', # label for the search
    output_path:Optional[str] = '../batch', # directory for storing generated files
    checkpoint_path:Optional[str] = '../ray', # directory for storing checkpoint files
    max_concurrent:Optional[int] = 1, # number of concurrent trials to run at one time
    batch:Optional[bool] = True, # whether concurrent trials should run synchronously or asynchronously
    num_samples:Optional[int] = 1, # number of trials to run
    metric:Optional[str] = "loss", # metric to optimize (this should match some key: value pair in the returned data
    mode:Optional[str] = "min", # either 'min' or 'max' (whether to minimize or maximize the metric
    optuna_config:Optional[dict] = None, # additional configuration for the optuna search algorithm (incl. sampler, seed, etc.)
    ray_config:Optional[dict] = None, # additional configuration for the ray initialization

    Creates
    -------
    <label>.csv: file containing the results of the search

    Returns
    -------
    Study: namedtuple('Study', ['algo', 'results'])(algo, results), # named tuple containing the created algorithm and the results of the search
    """
    from warnings import warn
    warn("ray_optuna_search is deprecated, please use ray_search with algorithm='optuna' instead", DeprecationWarning)

"""
Parameters
:
space –
Hyperparameter search space definition for Optuna’s sampler. This can be either a dict with parameter names as keys and optuna.distributions as values, or a Callable - in which case, it should be a define-by-run function using optuna.trial to obtain the hyperparameter values. The function should return either a dict of constant values with names as keys, or None. For more information, see https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html.
Warning
No actual computation should take place in the define-by-run function. Instead, put the training logic inside the function or class trainable passed to tune.Tuner().
metric – The training result objective value attribute. If None but a mode was passed, the anonymous metric _metric will be used per default. Can be a list of metrics for multi-objective optimization.
mode – One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute. Can be a list of modes for multi-objective optimization (corresponding to metric).
points_to_evaluate – Initial parameter suggestions to be run first. This is for when you already have some good parameters you want to run first to help the algorithm make better suggestions for future parameters. Needs to be a list of dicts containing the configurations.
sampler –
Optuna sampler used to draw hyperparameter configurations. Defaults to MOTPESampler for multi-objective optimization with Optuna<2.9.0, and TPESampler in every other case. See https://optuna.readthedocs.io/en/stable/reference/samplers/index.html for available Optuna samplers.
Warning
Please note that with Optuna 2.10.0 and earlier default MOTPESampler/TPESampler suffer from performance issues when dealing with a large number of completed trials (approx. >100). This will manifest as a delay when suggesting new configurations. This is an Optuna issue and may be fixed in a future Optuna release.
seed – Seed to initialize sampler with. This parameter is only used when sampler=None. In all other cases, the sampler you pass should be initialized with the seed already.
evaluated_rewards –
If you have previously evaluated the parameters passed in as points_to_evaluate you can avoid re-running those trials by passing in the reward attributes as a list so the optimiser can be told the results without needing to re-compute the trial. Must be the same length as points_to_evaluate.
"""

[docs]
def prune_dataframe(results: pandas.DataFrame) -> pandas.DataFrame:
    #def process_column(column):
    #    expanded_column = column.apply(lambda x: pandas.read_csv(StringIO(x), sep='\s\s+', header=None))
    #    return pandas.DataFrame([c.values.T[1] for c in expanded_column], columns=expanded_column[0].values.T[0]).drop(columns=['dtype:'])
    # call process_column instead, with both 'config' and 'data'
    try:
        data   = results['data'].apply(lambda x: pandas.read_csv(StringIO(x), sep='\s\s+', header=None))
        df     = pandas.DataFrame([d.values.T[1] for d in data], columns=data[0].values.T[0]).iloc[ :, :-1]
    except Exception as e:
        df = results
    # use >=2 whitespace delimiter for compatibility with lists, dictionaries, where single whitespace character is placed between
    # objects.
    #config = results['config'].apply(lambda x: pandas.read_csv(StringIO(x), sep='\s+', header=None))
    return df


study = namedtuple('Study', ['results', 'data'])

[docs]
def ray_search(dispatcher_constructor: Callable, # constructor for the dispatcher (e.g. INETDispatcher)
               submit_constructor: Callable, # constructor for the submit (e.g. SHubmitSOCK)
               run_config: Dict, # batch configuration, (keyword: string pairs to customize the submit template)
               params: Dict, # search space (dictionary of parameter keys: tune search spaces)
               algorithm: Optional[str] = "variant_generator", # search algorithm to use, see SEARCH_ALG_IMPORT for available options
               label: Optional[str] = 'search', # label for the search
               output_path: Optional[str] = './batch', # directory for storing generated files
               checkpoint_path: Optional[str] = './checkpoint', # directory for storing checkpoint files
               max_concurrent: Optional[int] = 1, # number of concurrent trials to run at one time
               batch: Optional[bool] = True, # whether concurrent trials should run synch\ronously or asynchronously
               num_samples: Optional[int] = 1, # number of trials to run
               metric: Optional[str] = None, # metric to optimize, if not supplied, no data will be collated.
               mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric
               sample_interval: Optional[int] = 15,  # interval to check for new results (in seconds)
               algorithm_config: Optional[dict] = None, # additional configuration for the search algorithm
               ray_config: Optional[dict] = None, # additional configuration for the ray initialization
               attempt_restore: Optional[bool] = True, # whether to attempt to restore from a checkpoint
               clean_checkpoint = True, # whether to clean the checkpoint directory after a completed successful search, errored searches will skip cleanup.
               report_config = ('path', 'config', 'data'), # what to report back to the user
               prune_metadata = True, # whether to prune the metadata from the results.csv
               remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
               host: Optional[str] = None,  # host to run the search on
               key: Optional[str] = None,  # key for TOTP generator...
               file_cleanup: Optional[bool|list|tuple] = True, # whether to clean up accessory files after the search is completed
               advanced_logging: Optional[bool|str] = True,
               ) -> study:

    expected_total = params.pop('_expected_trials_per_sample') * num_samples
    if (dispatcher_constructor == runtk.dispatchers.SSHDispatcher) or \
       (dispatcher_constructor == SSHGridDispatcher):
        dispatcher_kwargs = None
        if submit_constructor == submits.SGESubmitSSH:
            from fabric import connection
            dispatcher_kwargs = {'connection': connection.Connection(host)}
        if submit_constructor == submits.SlurmSubmitSSH:
            from batchtk.utils import TOTPConnection
            dispatcher_kwargs = {'connection': TOTPConnection(host, key)}
        if dispatcher_kwargs == None:
            raise ValueError("for SSH based methods, please provide either 'sftp' or None as the comm_type")
    else:
        dispatcher_kwargs = {}
    if ray_config is None:
        ray_config = {}

    ray_init_kwargs = ray_config#{"runtime_env": {"working_dir:": "."}} | ray_config

    ray.init(**ray_init_kwargs) # TODO needed for python import statements ?

    if algorithm_config is None:
        algorithm_config = {}

    algorithm_config = {
        'metric': metric,
        'mode': mode,
        'max_concurrent': max_concurrent,
        'batch': batch,
    } | algorithm_config

    if metric is None:
        algorithm_config['metric'] = '_none_placeholder'

    #TODO class this object for self calls? cleaner? vs nested functions
    #TODO clean up working_dir and excludes
    storage_path = get_path(checkpoint_path)
    adv_path = None
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    if advanced_logging:
        if advanced_logging is True:
            advanced_logging = "./" #follows from os.getcwd()
            adv_path = get_path("{}/run_{}".format(advanced_logging, timestamp))
        if isinstance(advanced_logging, str):
            adv_path = get_path(advanced_logging)
        os.makedirs(adv_path, exist_ok=True)

    if file_cleanup is True:
        file_cleanup = (runtk.SGLOUT, runtk.MSGOUT)
    load_path = "{}/{}".format(storage_path, label)
    algo = create_searcher(algorithm, **algorithm_config) #concurrency may not be accepted by all algo
    #search_alg – The search algorithm to use.
    #  metric – The training result objective value attribute. Stopping procedures will use this attribute.
    #  mode – One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute.
    #  **kwargs – Additional parameters. These keyword arguments will be passed to the initialization function of the chosen class.
    try:
        algo = ConcurrencyLimiter(searcher=algo, max_concurrent=algorithm_config['max_concurrent'], batch=algorithm_config['batch'])
    except:
        pass

    #submit = submit_constructor()
    #submit.update_templates(
    #    **run_config
    #)
    def ray_trial(config, label, dispatcher_constructor, project_path, output_path, submit_constructor,
                  dispatcher_kwargs=None, submit_kwargs=None, interval=60, data_storage=None, debug_log=None,
                  report=('path', 'config', 'data'), cleanup=(runtk.SGLOUT, runtk.MSGOUT), check_storage=False):
        debug_log, data_storage = None, None
        if adv_path:
            debug_log = ScriptLogger(file_out="{}/trials.log".format(adv_path))
            data_storage = SQLiteStorage(label='trials', path=adv_path, entries=('path', 'config', 'data'))
        tid = tune.get_context().get_trial_id()
        tid = tid.split('_')[-1]  # value for trial (can be int/string)
        return trial(
            config=config, label=label, tid=tid, dispatcher_constructor=dispatcher_constructor,
            project_path=project_path, output_path=output_path, submit_constructor=submit_constructor,
            dispatcher_kwargs=dispatcher_kwargs, submit_kwargs=submit_kwargs, interval=interval,
            data_storage=data_storage, debug_log=debug_log, report=report, cleanup=cleanup, check_storage=check_storage)

    project_path = remote_dir or os.getcwd() # if remote_dir is None, then use the current working directory
    def run(config):
        config.update({'saveFolder': output_path, 'simLabel': LABEL_POINTER})
        data = ray_trial(config=config, label=label, dispatcher_constructor=dispatcher_constructor,
                         project_path=project_path, output_path=output_path, submit_constructor=submit_constructor,
                         dispatcher_kwargs=dispatcher_kwargs, submit_kwargs=run_config,
                         interval=sample_interval, report=report_config,
                         cleanup=file_cleanup, check_storage=False)
        if metric is None:
            metrics = {'data': data, '_none_placeholder': 0} #TODO, should include 'config' now with purge_metadata?
            session.report(metrics)
        elif isinstance(metric, str):
            metrics = {'data': data, metric: data[metric]}
            session.report(metrics)
        elif isinstance(metric, (list, tuple)):
            metrics = {k: data[k] for k in metric}
            metrics['data'] = data
            #metrics['config'] = config
            session.report(metrics)
        else:
            session.report({'data': data, '_none_placeholder': 0})
    if attempt_restore and tune.Tuner.can_restore(load_path):#TODO check restore
        print("resuming previous run from {}".format(load_path))
        tuner = tune.Tuner.restore(path=load_path,
                                   trainable=run,
                                   resume_unfinished=True,
                                   resume_errored=False,
                                   restart_errored=True,
                                   param_space=params,
                                   )
    else:
        print("starting new run to {}".format(load_path))
        tuner = tune.Tuner(
            run,
            tune_config=tune.TuneConfig(
                search_alg=algo,
                num_samples=num_samples, # grid search samples 1 for each param
                metric=algorithm_config['metric'],
                mode=algorithm_config['mode'],
            ),
            run_config=RunConfig(
                storage_path=storage_path,
                name=label,
            ),
            param_space=params,
        )
    results = tuner.fit()
    errors = results.errors
    df = results.get_dataframe() # note that results.num_terminated DOES NOT CURRENTLY reflect collected datapoints.
    num_total = len(df)
    if errors or num_total < expected_total:
        print("errors/SIGINT occurred during execution: {}".format(errors))
        print("only {} of {} expected trials completed successfully".format(num_total, expected_total))
        print("see {} for more information".format(output_path))
        print("keeping {} checkpoint directory".format(load_path))
        print("rerunning the same search again will restore valid checkpointed data in {}".format(load_path))
        if prune_metadata:
            df = prune_dataframe(df)
        print("saving current results to {}.csv".format(label))
        df.to_csv("{}.csv".format(label))
        return study( results, df)
    #df = results.get_dataframe()
    if prune_metadata:
        df = prune_dataframe(df)
    print("saving results to {}.csv".format(label))
    df.to_csv("{}.csv".format(label))
    if clean_checkpoint:
        os.system("rm -r {}".format(load_path))
    return study( results, df)


#should be constant?
constructors = namedtuple('constructors', 'dispatcher, submit')
constructor_tuples = {
    ('sge', 'socket'): constructors(runtk.dispatchers.INETDispatcher, submits.SGESubmitSOCK),
    ('sge', 'sfs' ): constructors(runtk.dispatchers.LocalDispatcher , submits.SGESubmitSFS ),
    ('sge', None): constructors(LocalGridDispatcher, submits.SGESubmit),
    ('ssh_sge', 'sftp'): constructors(runtk.dispatchers.SSHDispatcher, submits.SGESubmitSSH), #TODO, both of these need comm types
    ('ssh_slurm', 'sftp'): constructors(runtk.dispatchers.SSHDispatcher, submits.SlurmSubmitSSH),
    ('ssh_sge', None): constructors(SSHGridDispatcher, submits.SGESubmitSSH), #don't need to worry about changing the handl
    ('ssh_slurm', None): constructors(SSHGridDispatcher, submits.SlurmSubmitSSH),
    #('zsh', 'inet'): constructors(runtk.dispatchers.INETDispatcher, runtk.submits.ZSHSubmitSOCK), #TODO preferable to use AF_UNIX sockets on local machines
    #('slurm', 'socket'): constructors(runtk.dispatchers.INETDispatcher, submits.SlurmSubmitSOCK),
    #('slurm', 'sfs' ): constructors(runtk.dispatchers.SFSDispatcher , submits.SlurmSubmitSFS),
    ('sh', 'socket'): constructors(runtk.dispatchers.INETDispatcher, submits.SHSubmitSOCK), #
    ('sh', 'sfs' ): constructors(runtk.dispatchers.LocalDispatcher , submits.SHSubmitSFS),
    ('sh', None): constructors(LocalGridDispatcher, submits.SHSubmit),
}#TODO, just say "socket"?


[docs]
def load_search(path: str, prune_metadata=True) -> pandas.DataFrame:
    def run(config):
        pass
    path = get_path(path)
    try:
        tuner = tune.Tuner.restore(path, run)
    except Exception as e:
        raise e
    df = tuner.get_results().get_dataframe()
    if prune_metadata:
        df = prune_dataframe(df)
    return df

"""
some shim functions before ray_search
"""

[docs]
def generate_constructors(job_type, comm_type, **kwargs):
    """"
    returns the dispatcher, submit constructor pair for ray_search based on the job_type and comm_type inputs
    """
    if (job_type, comm_type) not in constructor_tuples:
        raise ValueError("Invalid job_type or comm_type pairing")
    return constructor_tuples[(job_type, comm_type)]



[docs]
def generate_parameters(params, algorithm, **kwargs):
    """
    Returns a dictionary of parameters for ray_search based on the input dictionary from:
    params = {
    'synMechTau2': [3.0, 5.0, 7.0], # assumes list of values by default if grid search-like algo
    #'synMechTau2': [3.0, 7.0], # assumes lower/upper bounds by default if evol-like algo
    'connWeight' : paramtypes.sample_from(lambda _: numpy.random.uniform(0.005, 0.15))
    } # can optionally pass any of the paramtypes (= ray.tune data types)
    """
    #TODO: check coverage of conditional statements (looks okay?)
    ray_params = {}
    _expected_trials_per_sample = 1
    for param, space in params.items():
        if   isinstance(space, (list, tuple, range, numpy.ndarray)) and algorithm in {'variant_generator'}:
            ray_params[param] = tune.grid_search(space) #specify random for uniform and choice.
            _expected_trials_per_sample *= len(space)
        elif isinstance(space, (list, tuple)) and algorithm in SEARCH_ALG_IMPORT.keys():
            if len(space) == 2: #if 2 sample from uniform lb, ub
                ray_params[param] = tune.uniform(*space)
            else: #otherwise treat as a list for a categorical search
                ray_params[param] = tune.choice(space)
        else: #assume a tune search space was defined
            ray_params[param] = space
            if isinstance(space, dict):
                _expected_trials_per_sample *= len(space['grid_search'])
    ray_params['_expected_trials_per_sample'] = _expected_trials_per_sample
    return ray_params




[docs]
def shim(dispatcher_constructor: Optional[Callable] = None, # constructor for the dispatcher (e.g. INETDispatcher)
         submit_constructor: Optional[Callable] = None, # constructor for the submit (e.g. SHubmitSOCK)
         job_type: Optional[str] = None, # the submission engine to run a single simulation (e.g. 'sge', 'sh')
         comm_type: Optional[str] = None, # the method of communication between host dispatcher and the simulation (e.g. 'socket', 'sfs' (shared filesystem), None (no communication) )
         run_config: Optional[Dict] = None,  # batch configuration, (keyword: string pairs to customize the submit template)
         params: Optional[Dict] = None,  # search space (dictionary of parameter keys: tune search spaces)
         algorithm: Optional[str] = "variant_generator", # search algorithm to use, see SEARCH_ALG_IMPORT for available options
         label: Optional[str] = 'search',  # label for the search
         output_path: Optional[str] = './batch',  # directory for storing generated files
         checkpoint_path: Optional[str] = './checkpoint',  # directory for storing checkpoint files
         max_concurrent: Optional[int] = 1,  # number of concurrent trials to run at one time
         batch: Optional[bool] = True,  # whether concurrent trials should run synchronously or asynchronously
         num_samples: Optional[int] = 1,  # number of trials to run
         metric: Optional[str] = None, # metric to optimize (this should match some key: value pair in the returned data
         mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric
         sample_interval: Optional[int] = 15,  # interval to check for new results (in seconds)
         algorithm_config: Optional[dict] = None,  # additional configuration for the search algorithm
         ray_config: Optional[dict] = None,  # additional configuration for the ray initialization
         attempt_restore: Optional[bool] = True, # whether to attempt to restore from a checkpoint
         clean_checkpoint: Optional[bool] = True, # whether to clean the checkpoint directory after the search
         report_config=('path', 'config', 'data'),  # what to report back to the user
         prune_metadata: Optional[bool] = True, # whether to prune the metadata from the results.csv
         remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
         host: Optional[str] = None,  # host to run the search on
         key: Optional[str] = None,  # key for TOTP generator...
         file_cleanup: Optional[bool] = True,  # whether to clean up accessory files after the search is completed
         advanced_logging: Optional[bool|str] = True,
         ) -> Dict:
    kwargs = locals()
    if metric is None and algorithm not in ['variant_generator', 'random', 'grid']:
        raise ValueError("a metric (string) must be specified for optimization searches")
    if algorithm == 'grid':
        kwargs['algorithm'] = 'variant_generator'
    if job_type is not None and (comm_type is not None or metric is None):
        kwargs['dispatcher_constructor'], kwargs['submit_constructor'] = generate_constructors(job_type, comm_type)
    if dispatcher_constructor is not None and (submit_constructor is not None or metric is None):
        kwargs['dispatcher_constructor'] = dispatcher_constructor
        kwargs['submit_constructor'] = submit_constructor
    if kwargs['dispatcher_constructor'] is None or (kwargs['submit_constructor'] is None and metric is not None):
        raise ValueError("missing job method and communication type for an optimization search, either specify a dispatcher_constructor and submit_constructor or a job_type and comm_type")
    if (kwargs['dispatcher_constructor'] == runtk.dispatchers.SSHDispatcher) and (host is None or remote_dir is None):
        raise ValueError("missing host and remote directory for SSH based dispatcher")
    if (kwargs['submit_constructor'] == submits.SlurmSubmitSSH) and key is None:
        raise ValueError("missing key for Slurm based dispatcher")
    if kwargs['dispatcher_constructor'] is None:
        raise ValueError("missing job type for grid or random based search, specify a job type")
    if params is None:
        raise ValueError("missing parameters, specify params")
    if run_config is None:
        run_config = {}
    [kwargs.pop(args) for args in ['job_type', 'comm_type']]
    kwargs['params'] = generate_parameters(**kwargs)
    return kwargs




[docs]
def search(dispatcher_constructor: Optional[Callable] = None, # constructor for the dispatcher (e.g. INETDispatcher)
           submit_constructor: Optional[Callable] = None, # constructor for the submit (e.g. SHubmitSOCK)
           job_type: Optional[str] = None, # the submission engine to run a single simulation (e.g. 'sge', 'sh')
           comm_type: Optional[str] = None, # the method of communication between host dispatcher and the simulation (e.g. 'socket', 'filesystem')
           run_config: Optional[dict] = None,  # batch configuration, (keyword: string pairs to customize the submit template)
           params: Optional[dict] = None,  # search space (dictionary of parameter keys: tune search spaces)
           algorithm: Optional[str] = "variant_generator", # search algorithm to use, see SEARCH_ALG_IMPORT for available options
           label: Optional[str] = 'search',  # label for the search
           output_path: Optional[str] = './batch',  # directory for storing generated files
           checkpoint_path: Optional[str] = './checkpoint',  # directory for storing checkpoint files
           max_concurrent: Optional[int] = 1,  # number of concurrent trials to run at one time
           batch: Optional[bool] = True,  # whether concurrent trials should run synchronously or asynchronously
           num_samples: Optional[int] = 1,  # number of trials to run
           metric: Optional[str] = None, # metric to optimize (this should match some key: value pair in the returned data
           mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric
           sample_interval: Optional[int] = 15,  # interval to check for new results (in seconds)
           algorithm_config: Optional[dict] = None,  # additional configuration for the search algorithm
           ray_config: Optional[dict] = None,  # additional configuration for the ray initialization
           attempt_restore: Optional[bool] = True, # whether to attempt to restore from a checkpoint
           clean_checkpoint: Optional[bool] = True, # whether to clean the checkpoint directory after the search
           report_config=('path', 'config', 'data'),  # what to report back to the user within data.
           prune_metadata: Optional[bool] = True, # whether to prune the metadata from the results.csv
           remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
           host: Optional[str] = None, # host to run the search on
           key: Optional[str] = None, # key for TOTP generator.
           file_cleanup: Optional[bool] = True, # whether to clean up accessory files after the search is completed
           advanced_logging: Optional[bool|str] = True,
           ) -> study: # results of the search -> study.results (raw tune.ResultGrid), study.data (pandas.DataFrame conversion)
    """
    search(...)

    Parameters
    ----------
    dispatcher_constructor: Callable, # constructor for the dispatcher (e.g. INETDispatcher)
    submit_constructor: Callable, # constructor for the submit (e.g. SHubmitSOCK)
    job_type: str, # the submission engine to run a single simulation (e.g. 'sge', 'sh')
    comm_type: Optional[str], # the method of communication between host dispatcher and the simulation (e.g. 'socket', 'filesystem', None), if None, expects a non-optimization based search (grid/random/etc.)
    run_config: Dict,  # batch configuration, (keyword: string pairs to customize the submit template)
    params: Dict,  # search space (dictionary of parameter keys: tune search spaces)
    algorithm: Optional[str] = "variant_generator", # search algorithm to use, see SEARCH_ALG_IMPORT for available options
    label: Optional[str] = 'search',  # label for the search
    output_path: Optional[str] = './batch',  # directory for storing generated files
    checkpoint_path: Optional[str] = './ray',  # directory for storing checkpoint files
    max_concurrent: Optional[int] = 1,  # number of concurrent trials to run at one time
    batch: Optional[bool] = True,  # whether concurrent trials should run synchronously or asynchronously
    num_samples: Optional[int] = 1,  # number of trials to run
    metric: Optional[str] = None, # metric to optimize (this should match some key: value pair in the returned data, or None if no optimization is desired
    mode: Optional[str] = "min",  # either 'min' or 'max' (whether to minimize or maximize the metric
    sample_interval: Optional[int] = 15,  # interval to check for new results (in seconds)
    algorithm_config: Optional[dict] = None,  # additional configuration for the search algorithm
    ray_config: Optional[dict] = None,  # additional configuration for the ray initialization
    attempt_restore: Optional[bool] = True, # whether to attempt to restore from a checkpoint
    clean_checkpoint: Optional[bool] = True, # whether to clean the checkpoint directory after the search
    prune_metadata: Optional[bool] = True, # whether to prune the metadata from the results.csv
    remote_dir: Optional[str] = None, # absolute path for directory to run the search on (for submissions over SSH)
    host: Optional[str] = None, # host to run the search on (for submissions over SSH)
    key: Optional[str] = None # key for TOTP generator (for submissions over SSH)
    file_cleanup: Optional[bool] = True, # whether to clean up accessory files after the search is completed
    advanced_logging: Optional[bool] = True, # enables advanced logging features, checkpoint_db and log_file.
    checkpoint_db: Optional[str] = None, # path for checkpoint db file.
    log_file: Optional[str] = None, # path for the log file
    Creates (upon completed fitting run...)
    -------
    <label>.csv: file containing the results of the search


    Returns
    -------
    study instance with two attributes
        .results : tune.ResultGrid # raw data yielded from the search
        .data    : pandas.DataFrame # pandas dataframe containing the results of the search
    """
    kwargs = locals()
    kwargs = shim(**kwargs)
    return ray_search(**kwargs)



"""
SEE:
'variant_generator'
'random' <- deprecated -> points to variant_generator
'ax'
'dragonfly' <- deprecated
'skopt' <- deprecated
'hyperopt'
'bayesopt'
'bohb'
'nevergrad'
'optuna'
'zoopt'
'sigopt' <- deprecated
'hebo'
'blendsearch' <- deprecated
'cfo' <- deprecated
"""