diff --git a/msprime/cli.py b/msprime/cli.py index 3b9e96e47..248c77738 100644 --- a/msprime/cli.py +++ b/msprime/cli.py @@ -35,6 +35,7 @@ import msprime from . import ancestry +from . import json_input from . import mutations @@ -1119,6 +1120,18 @@ def run_ancestry(args): tree_sequence.dump(args.output) +def run_yaml(args): + setup_logging(args) + config = json_input.parse_yaml(args.yaml_file) + + if "num_replicates" in config.ancestry_kwargs: + raise ValueError("num_replicates not supported currently") + ts = msprime.sim_ancestry(**config.ancestry_kwargs) + if config.mutations_kwargs is not None: + ts = msprime.sim_mutations(ts, **config.mutations_kwargs) + ts.dump(args.output) + + def get_msp_parser(): top_parser = argparse.ArgumentParser( description="Command line interface for msprime.", epilog=msprime_citation_text @@ -1132,6 +1145,7 @@ def get_msp_parser(): add_ancestry_subcommand(subparsers) add_mutate_subcommand(subparsers) add_simulate_subcommand(subparsers) + add_yaml_subcommand(subparsers) return top_parser @@ -1301,6 +1315,23 @@ def add_ancestry_subcommand(subparsers) -> None: parser.set_defaults(runner=run_ancestry) +def add_yaml_subcommand(subparsers) -> None: + parser = subparsers.add_parser( + "yaml", + help=("Run a simulation described in a YAML input file."), + ) + parser.add_argument( + "-v", + "--verbosity", + action="count", + default=0, + help="Increase the verbosity. Use -v for INFO output and -vv for DEBUG", + ) + parser.add_argument("yaml_file", type=argparse.FileType("r")) + add_output_argument(parser) + parser.set_defaults(runner=run_yaml) + + def msp_main(arg_list=None): set_sigpipe_handler() parser = get_msp_parser() diff --git a/msprime/json_input.py b/msprime/json_input.py new file mode 100644 index 000000000..f21256f93 --- /dev/null +++ b/msprime/json_input.py @@ -0,0 +1,71 @@ +# +# Copyright (C) 2021 University of Oxford +# +# This file is part of msprime. +# +# msprime is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# msprime is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with msprime. If not, see . +# +""" +Define formats used for simulation input as JSON and related formats. +""" +from __future__ import annotations + +import copy +import dataclasses +import json + +import demes +from ruamel.yaml import YAML + +import msprime + + +@dataclasses.dataclass +class SimulationConfig: + ancestry_kwargs: dict + mutations_kwargs: dict = None + + +def parse_ancestry_json(data): + data = copy.deepcopy(data) + if "start_time" in data or "end_time" in data: + raise ValueError( + "specifying time values not currently supported as too confusing" + ) + if "demography" in data: + demes_dict = data["demography"] + # TODO nasty going back to JSON here - can we make a demes.fromdict() + # function to do this directly? + demes_model = demes.loads(json.dumps(demes_dict), format="json") + data["demography"] = msprime.Demography.from_demes(demes_model) + return data + + +def parse_mutations_json(data): + + if "start_time" in data or "end_time" in data: + raise ValueError( + "specifying time values not currently supported as too confusing" + ) + return data + + +def parse_yaml(text): + + yaml = YAML(typ="safe") + data = yaml.load(text) + config = SimulationConfig(parse_ancestry_json(data["ancestry"])) + if "mutations" in data: + config.mutations_kwargs = parse_mutations_json(data["mutations"]) + return config diff --git a/msprime/mutations.py b/msprime/mutations.py index 7a29aafb9..31fcc869f 100644 --- a/msprime/mutations.py +++ b/msprime/mutations.py @@ -20,6 +20,7 @@ Module responsible for generating mutations on a given tree sequence. """ import inspect +import logging import sys import numpy as np @@ -30,6 +31,8 @@ from . import provenance from msprime import _msprime +logger: logging.Logger = logging.getLogger(__name__) + _ACGT_ALLELES = ["A", "C", "G", "T"] _AMINO_ACIDS = [ "A", @@ -1360,6 +1363,9 @@ def sim_mutations( provenance.get_provenance_dict(parameters) ) + # TODO it'd be nice to have better __str__ here for the named models + logger.info(f"Running mutation model {type(model)}") + rng = _msprime.RandomGenerator(seed) lwt = _msprime.LightweightTableCollection() lwt.fromdict(tables.asdict())