Source code for pyabc.external.base

import copy
import logging
import os
import subprocess  # noqa: S404
import tempfile
from typing import List

import numpy as np
import pandas as pd

from ..model import Model
from ..parameters import Parameter
from .utils import timethis

logger = logging.getLogger("ABC.External")

# timeout error code
TIMEOUT: int = -15
# location key
LOC: str = "loc"
# returncode key
RETURNCODE: str = "returncode"



[docs]
class ExternalHandler:
    """
    Handler for calls to external scripts.

    This class bundles repeated functionality.
    """


[docs]
    def __init__(
        self,
        executable: str,
        file: str = None,
        fixed_args: List = None,
        create_folder: bool = False,
        suffix: str = None,
        prefix: str = None,
        dir: str = None,
        show_stdout: bool = False,
        show_stderr: bool = True,
        raise_on_error: bool = False,
        timeout: float = None,
    ):
        """
        Parameters
        ----------
        executable:
            Name of the executable to call, e.g. bash, java or Rscript.
            The executable may be parameterized, e.g. appearances of {loc}
            in the string are replaced at runtime by the location of the
            output.
        file:
            Path to the file to be executed, e.g. a
            .sh, .java or .r file, or also a .xml file depending on the
            executable.
        fixed_args:
            Argument string to use every time.
        create_folder:
            Whether the function should create a temporary directory.
            If False, only one temporary file is created.
        suffix, prefix, dir:
            Suffix, prefix, and base directory for the created
            temporary files.
        show_stdout, show_stderr:
            Whether to show or hide the stdout and stderr streams.
        raise_on_error:
            Whether to raise when an error in the execution of the external
            script occurs, or just continue.
        timeout:
            Maximum execution time in seconds, after which the executable is
            stopped.
        """
        self.executable = executable
        self.file = file
        if fixed_args is None:
            fixed_args = []
        self.fixed_args = fixed_args
        self.create_folder = create_folder
        self.suffix = suffix
        self.prefix = prefix
        self.dir = dir
        self.show_stdout = show_stdout
        self.show_stderr = show_stderr
        self.raise_on_error = raise_on_error
        self.timeout: float = timeout



[docs]
    def create_loc(self):
        """Create temporary file or folder.

        Returns
        -------
        loc: str
            Path of the created file or folder.
        """
        if self.create_folder:
            return tempfile.mkdtemp(
                suffix=self.suffix, prefix=self.prefix, dir=self.dir
            )
        else:
            return tempfile.mkstemp(
                suffix=self.suffix, prefix=self.prefix, dir=self.dir
            )[1]



[docs]
    def create_executable(self, loc):
        """Parse and return executable.

        Replaces instances of {loc} by the location `loc`.
        """
        executable = self.executable.replace("{loc}", loc)
        return executable



[docs]
    def run(self, args: List[str] = None, cmd: str = None, loc: str = None):
        """Run the script for the given arguments.

        Parameters
        ----------
        args: List[str], optional
            Arguments to pass to the external program, e.g. parameters.
        cmd: str, optional
            If this is not None, then it is assumed to contain the full
            command to be executed via the shell (then `args` is ignored).
            Be aware of possible security implications of shell injection.
        loc: str, optional
            Location for the output. If None is passed, one is created.
        """
        # create target on file system
        if loc is None:
            loc = self.create_loc()

        # redirect output
        devnull = open(os.devnull, 'w')
        stdout = stderr = {}
        if not self.show_stdout:
            stdout = {'stdout': devnull}
        if not self.show_stderr:
            stderr = {'stderr': devnull}

        # call
        try:
            if cmd is not None:
                status = subprocess.run(
                    cmd,
                    shell=True,  # noqa: S602
                    **stdout,
                    **stderr,
                    timeout=self.timeout,
                )
            else:
                executable = self.create_executable(loc)
                status = subprocess.run(  # noqa: S603
                    [
                        executable,
                        self.file,
                        *self.fixed_args,
                        *args,
                        f'target={loc}',
                    ],
                    **stdout,
                    **stderr,
                    timeout=self.timeout,
                )
            returncode, msg = status.returncode, ""
        except subprocess.TimeoutExpired as e:
            returncode, msg = TIMEOUT, str(e)
        if returncode:
            msg = (
                f"Simulation error for arguments {args}: "
                f"returncode {returncode}, msg={msg}."
            )
            if self.raise_on_error:
                raise ValueError(msg)
            else:
                logger.warning(msg)

        # return location and call's return status
        return {LOC: loc, RETURNCODE: returncode}





[docs]
class ExternalModel(Model):
    """
    Interface to a model that is called via an external simulator.

    Parameters are passed to the model as named command line arguments
    in the form

        {executable} {file} {par1}={val1} {par2}={val2} ... target={loc}

    Here, {file} is the script that performs the model simulation, and {loc}
    is the name of a temporary file or folder that was created to
    store the simulated data.


    .. note::
        The generated temporary files are not automatically deleted, unless
        by the system e.g. in the /tmp directory upon restart.
    """


[docs]
    def __init__(
        self,
        executable: str,
        file: str,
        fixed_args: List = None,
        create_folder: bool = False,
        suffix: str = None,
        prefix: str = "modelsim_",
        dir: str = None,
        show_stdout: bool = False,
        show_stderr: bool = True,
        raise_on_error: bool = False,
        timeout: float = None,
        name: str = "ExternalModel",
    ):
        """Initialize the model.

        Parameters
        ----------
        name: str, optional (default = "ExternalModel")
            As in pyabc.Model.name.

        All other parameters as in ExternalHandler.
        """
        super().__init__(name=name)
        self.eh = ExternalHandler(
            executable=executable,
            file=file,
            fixed_args=fixed_args,
            create_folder=create_folder,
            suffix=suffix,
            prefix=prefix,
            dir=dir,
            show_stdout=show_stdout,
            show_stderr=show_stderr,
            raise_on_error=raise_on_error,
            timeout=timeout,
        )



[docs]
    def __call__(self, pars: Parameter):
        args = []
        for key, val in pars.items():
            args.append(f"{key}={val} ")
        return self.eh.run(args)



[docs]
    def sample(self, pars):
        return self(pars)


    @timethis
    def sample_timing(self, pars):
        return self(pars)


[docs]
    def eval_param_limits(self, limits):
        """
        evaluate single parameter's boundary value on computation time.

        Parameters
        ----------
        limits: dict
            the lower and upper boundary values of parameters. The key would
            be the parameter name and the value would be a list of the lower
            and upper limit of parameter value, e.g., [lower, upper].

        Returns
        -------
        time_eval_dict: dict
            a dictionary that contains the parameter names as key and a list
            as a value. The list contains the computation time when using
            lower and upper limits, e.g., [lower, upper].
        """
        time_eval_dict = {}
        for key, val in limits.items():
            lower_bound = self.sample_timing({key: val[0]})
            upper_bound = self.sample_timing({key: val[1]})
            time_eval_dict[key] = [lower_bound, upper_bound]
        return time_eval_dict



[docs]
    def eval_param_limits_matrix(self, limits):
        """
        evaluate two paramters' boundary values on computation time.

        Parameters
        ----------
        limits: dict
            the lower and upper boundary values of parameters. The key would
            be the parameter name and the value would be a list of the lower
            and upper limit of parameter value, e.g., [lower, upper].

        Returns
        -------
        time_eval_mat_df_lower: df
            a dataframe for the computation time measured when using the lower
             limit value of parameters.
        time_eval_mat_df_upper: df
            a dataframe for the computation time measured when using the upper
             limit value of parameters.
        """
        time_eval_mat = np.zeros(shape=(len(limits), len(limits)))
        time_eval_mat_df_lower = pd.DataFrame(
            time_eval_mat,
            columns=[list(limits.keys())],
            index=[list(limits.keys())],
        )
        time_eval_mat_df_upper = copy.deepcopy(time_eval_mat_df_lower)
        for i, (key_col, val_col) in enumerate(limits.items(), 0):
            for j, (key_row, val_row) in enumerate(limits.items(), 0):
                if i < j:
                    time_eval_mat_df_lower.loc[[key_col], [key_row]] = 0
                    time_eval_mat_df_upper.loc[[key_col], [key_row]] = 0

                if key_col == key_row:
                    lower_bound = self.sample_timing({key_col: val_col[0]})
                    upper_bound = self.sample_timing({key_col: val_col[1]})

                else:
                    lower_bound = self.sample_timing(
                        {key_col: val_col[0], key_row: val_row[0]}
                    )
                    lower_bound = self.sample_timing(
                        {key_col: val_col[1], key_row: val_row[1]}
                    )
                time_eval_mat_df_lower.loc[[key_col], [key_row]] = lower_bound
                time_eval_mat_df_upper.loc[[key_col], [key_row]] = upper_bound

        return time_eval_mat_df_lower, time_eval_mat_df_upper





[docs]
class ExternalSumStat:
    """
    Interface to an external calculator that takes the simulated model output
    and writes to file the summary statistics.

    Format:
        {executable} {file} model_output={model_output} target={loc}

    Here, {file} is the path to the summary statistics computation script,
    {model_output} is the path to the previously generated model output, and
    {loc} is the destination to write te summary statistics to.
    """


[docs]
    def __init__(
        self,
        executable: str,
        file: str,
        fixed_args: List = None,
        create_folder: bool = False,
        suffix: str = None,
        prefix: str = "sumstat_",
        dir: str = None,
        show_stdout: bool = False,
        show_stderr: bool = True,
        raise_on_error: bool = False,
        timeout: float = None,
    ):
        self.eh = ExternalHandler(
            executable=executable,
            file=file,
            fixed_args=fixed_args,
            create_folder=create_folder,
            suffix=suffix,
            prefix=prefix,
            dir=dir,
            show_stdout=show_stdout,
            show_stderr=show_stderr,
            raise_on_error=raise_on_error,
            timeout=timeout,
        )



[docs]
    def __call__(self, model_output):
        """
        Create summary statistics from the `model_output` generated
        by the model.
        """
        args = [f"model_output={model_output[LOC]}"]
        return self.eh.run(args=args)





[docs]
class ExternalDistance:
    """
    Use script and sumstat output files to compute the distance.

    Format:
        {executable} {file} sumstat_0={sumstat_0} sumstat_1={sumstat_1}
        target={loc}

    The distance is written to a file, which is then read in (it must only
    contain a single float number).
    """


[docs]
    def __init__(
        self,
        executable: str,
        file: str,
        fixed_args: List = None,
        suffix: str = None,
        prefix: str = "dist_",
        dir: str = None,
        show_stdout: bool = False,
        show_stderr: bool = True,
        raise_on_error: bool = False,
        timeout: float = None,
    ):
        self.eh = ExternalHandler(
            executable=executable,
            file=file,
            fixed_args=fixed_args,
            create_folder=False,
            suffix=suffix,
            prefix=prefix,
            dir=dir,
            show_stdout=show_stdout,
            show_stderr=show_stderr,
            raise_on_error=raise_on_error,
            timeout=timeout,
        )



[docs]
    def __call__(self, sumstat_0, sumstat_1):
        # check if external script failed
        if sumstat_0[RETURNCODE] or sumstat_1[RETURNCODE]:
            return np.nan
        args = [
            f"sumstat_0={sumstat_0[LOC]}",
            f"sumstat_1={sumstat_1[LOC]}",
        ]
        ret = self.eh.run(args)
        # read in distance
        with open(ret[LOC], 'rb') as f:
            distance = float(f.read())
        os.remove(ret[LOC])
        return distance





[docs]
def create_sum_stat(loc: str = '', returncode: int = 0):
    """
    Create a summary statistics dictionary, as returned by the
    `ExternalModel`.

    Can be used to encode the measured summary statistics, or
    also create a dummy summary statistic.

    Parameters
    ----------
    loc: str, optional (default = '')
        Location of the summary statistics file or folder.
    returncode: int, optional (default = 0)
        Defaults to 0, indicating correct execution. Should usually
        not be changed.

    Returns
    -------
    A dictionary with keys 'loc' and 'returncode' of the given
    parameters.
    """
    return {LOC: loc, RETURNCODE: returncode}