tskit-dev · jeromekelleher · Sep 18, 2021 · Sep 20, 2021 · grahamgower · Sep 20, 2021
diff --git a/msprime/cli.py b/msprime/cli.py
@@ -35,6 +35,7 @@
 
 import msprime
 from . import ancestry
+from . import json_input
 from . import mutations
 
 
@@ -1119,6 +1120,18 @@ def run_ancestry(args):
     tree_sequence.dump(args.output)
 
 
+def run_yaml(args):
+    setup_logging(args)
+    config = json_input.parse_yaml(args.yaml_file)
+
+    if "num_replicates" in config.ancestry_kwargs:
+        raise ValueError("num_replicates not supported currently")
+    ts = msprime.sim_ancestry(**config.ancestry_kwargs)
+    if config.mutations_kwargs is not None:
+        ts = msprime.sim_mutations(ts, **config.mutations_kwargs)
+    ts.dump(args.output)
+
+
 def get_msp_parser():
     top_parser = argparse.ArgumentParser(
         description="Command line interface for msprime.", epilog=msprime_citation_text
@@ -1132,6 +1145,7 @@ def get_msp_parser():
     add_ancestry_subcommand(subparsers)
     add_mutate_subcommand(subparsers)
     add_simulate_subcommand(subparsers)
+    add_yaml_subcommand(subparsers)
 
     return top_parser
 
@@ -1301,6 +1315,23 @@ def add_ancestry_subcommand(subparsers) -> None:
     parser.set_defaults(runner=run_ancestry)
 
 
+def add_yaml_subcommand(subparsers) -> None:
+    parser = subparsers.add_parser(
+        "yaml",
+        help=("Run a simulation described in a YAML input file."),
+    )
+    parser.add_argument(
+        "-v",
+        "--verbosity",
+        action="count",
+        default=0,
+        help="Increase the verbosity. Use -v for INFO output and -vv for DEBUG",
+    )
+    parser.add_argument("yaml_file", type=argparse.FileType("r"))
+    add_output_argument(parser)
+    parser.set_defaults(runner=run_yaml)
+
+
 def msp_main(arg_list=None):
     set_sigpipe_handler()
     parser = get_msp_parser()

diff --git a/msprime/json_input.py b/msprime/json_input.py
@@ -0,0 +1,71 @@
+#
+# Copyright (C) 2021 University of Oxford
+#
+# This file is part of msprime.
+#
+# msprime is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# msprime is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with msprime.  If not, see <http://www.gnu.org/licenses/>.
+#
+"""
+Define formats used for simulation input as JSON and related formats.
+"""
+from __future__ import annotations
+
+import copy
+import dataclasses
+import json
+
+import demes
+from ruamel.yaml import YAML
+
+import msprime
+
+
+@dataclasses.dataclass
+class SimulationConfig:
+    ancestry_kwargs: dict
+    mutations_kwargs: dict = None
+
+
+def parse_ancestry_json(data):
+    data = copy.deepcopy(data)
+    if "start_time" in data or "end_time" in data:
+        raise ValueError(
+            "specifying time values not currently supported as too confusing"
+        )
+    if "demography" in data:
+        demes_dict = data["demography"]
+        # TODO nasty going back to JSON here - can we make a demes.fromdict()
+        # function to do this directly?
+        demes_model = demes.loads(json.dumps(demes_dict), format="json")
+        data["demography"] = msprime.Demography.from_demes(demes_model)
+    return data
+
+
+def parse_mutations_json(data):
+
+    if "start_time" in data or "end_time" in data:
+        raise ValueError(
+            "specifying time values not currently supported as too confusing"
+        )
+    return data
+
+
+def parse_yaml(text):
+
+    yaml = YAML(typ="safe")
+    data = yaml.load(text)
+    config = SimulationConfig(parse_ancestry_json(data["ancestry"]))
+    if "mutations" in data:
+        config.mutations_kwargs = parse_mutations_json(data["mutations"])
+    return config
diff --git a/msprime/mutations.py b/msprime/mutations.py
@@ -20,6 +20,7 @@
 Module responsible for generating mutations on a given tree sequence.
 """
 import inspect
+import logging
 import sys
 
 import numpy as np
@@ -30,6 +31,8 @@
 from . import provenance
 from msprime import _msprime
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 _ACGT_ALLELES = ["A", "C", "G", "T"]
 _AMINO_ACIDS = [
     "A",
@@ -1360,6 +1363,9 @@ def sim_mutations(
         provenance.get_provenance_dict(parameters)
     )
 
+    # TODO it'd be nice to have better __str__ here for the named models
+    logger.info(f"Running mutation model {type(model)}")
+
     rng = _msprime.RandomGenerator(seed)
     lwt = _msprime.LightweightTableCollection()
     lwt.fromdict(tables.asdict())