diff --git a/.github/workflows/benchmarks.yaml b/.github/workflows/benchmarks.yaml index eaef083..8a6e350 100644 --- a/.github/workflows/benchmarks.yaml +++ b/.github/workflows/benchmarks.yaml @@ -12,9 +12,8 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11"] - platform: [ubuntu-latest] fail-fast: false - runs-on: ${{ matrix.platform }} + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/coreTests_LinuxUbuntu.yaml b/.github/workflows/coreTests_LinuxUbuntu.yaml index cfb1194..3ac6713 100644 --- a/.github/workflows/coreTests_LinuxUbuntu.yaml +++ b/.github/workflows/coreTests_LinuxUbuntu.yaml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e . - name: Test with pytest run: | diff --git a/.github/workflows/fullTest.yaml b/.github/workflows/fullTest.yaml index 703978a..161e9fe 100644 --- a/.github/workflows/fullTest.yaml +++ b/.github/workflows/fullTest.yaml @@ -11,7 +11,7 @@ on: jobs: testPython309: - runs-on: macos-latest + runs-on: macos-12 strategy: fail-fast: false steps: @@ -35,7 +35,7 @@ jobs: run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 --cov=pysipfenn --cov-report=xml env: MODELS_FETCHED: true @@ -44,7 +44,7 @@ jobs: testPython310: needs: testPython309 - runs-on: macos-latest + runs-on: macos-12 steps: - uses: actions/checkout@v3 @@ -59,19 +59,19 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e ".[dev]" - name: Download Models run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 env: MODELS_FETCHED: true testPython311: needs: testPython310 - runs-on: macos-latest + runs-on: macos-12 steps: - uses: actions/checkout@v3 @@ -86,12 +86,12 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e ".[dev]" - name: Download Models run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 env: MODELS_FETCHED: true diff --git a/.github/workflows/publishPyPI.yaml b/.github/workflows/publishPyPI.yaml index c76b9f3..9018bbc 100644 --- a/.github/workflows/publishPyPI.yaml +++ b/.github/workflows/publishPyPI.yaml @@ -7,7 +7,7 @@ on: jobs: deploy: - runs-on: ubuntu-latest + runs-on: macos-14 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/weeklyTesting.yaml b/.github/workflows/weeklyTesting.yaml index da6d4b7..df19d92 100644 --- a/.github/workflows/weeklyTesting.yaml +++ b/.github/workflows/weeklyTesting.yaml @@ -30,11 +30,11 @@ jobs: python -m pip install -e . - name: Test with pytest run: | - pytest + pytest --durations=0 testFullPython310: needs: coreTests - runs-on: macos-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -55,7 +55,7 @@ jobs: run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest + run: pytest --durations=0 env: MODELS_FETCHED: true diff --git a/pyproject.toml b/pyproject.toml index dec3954..4bacbd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,22 +28,23 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ - "pymatgen>=2023.2.22", - "torch>=1.11.0", + "pymatgen>=2023.10.3", + "torch>=2.0", "onnx2torch>=1.5.2", "onnx>=1.13.0", - "numpy>=1.22.0", + "numpy>=1.25.0", "tqdm>=4.65.0", "natsort>=8.3.0", - "pymongo>=4.2", + "pymongo>=4.4", "pySmartDL>=1.3.4", "dnspython", + "ruamel.yaml" ] [project.optional-dependencies] dev = [ - "coremltools>=6.3", - "onnxconverter_common>=1.13.0", + "coremltools>=7.0", + "onnxconverter_common>=1.14.0", "onnxsim==0.4.33", "onnxruntime>=1.16.0" ] diff --git a/pysipfenn/core/modelExporters.py b/pysipfenn/core/modelExporters.py index 1d0bd86..c1feb10 100644 --- a/pysipfenn/core/modelExporters.py +++ b/pysipfenn/core/modelExporters.py @@ -15,22 +15,22 @@ class ONNXExporter: """Export models to the ONNX format (what they ship in by default) to allow (1) exporting modified pySIPFENN models, - (2) simplify the models using ONNX optimizer, and (3) convert them to FP16 precision, cutting the size in half. + (2) simplify the models using ONNX optimizer, and (3) convert them to `FP16` precision, cutting the size in half. Args: - calculator: A calculator object with loaded models that has loaded PyTorch models (happens automatically - when the autoLoad argument is kept to its default value of True when initializing the Calculator). During the + calculator: A ``Calculator`` object with loaded models that has loaded PyTorch models (happens automatically + when the ``autoLoad`` argument is kept to its default value of ``True`` when initializing the Calculator). During the initialization, the loaded PyTorch models are converted back to ONNX (in memory) to be then either adjusted or persisted to disk. Attributes: - calculator: A calculator object with ONNX loaded models. - simplifiedDict: A dictionary of models that have been simplified. - fp16Dict: A dictionary of models that have been converted to FP16. + calculator: A Calculator object with ONNX loaded models. + simplifiedDict: A boolean dictionary of models that have been simplified. + fp16Dict: A boolean dictionary of models that have been converted to FP16. """ def __init__(self, calculator: Calculator): - """Initialize the ONNXExporter using a calculator object.""" + """Initialize the ``ONNXExporter`` using a calculator object.""" self.simplifiedDict = {model: False for model in calculator.loadedModels.keys()} self.fp16Dict = {model: False for model in calculator.loadedModels.keys()} self.calculator = calculator @@ -80,7 +80,7 @@ def simplify(self, model: str) -> None: """Simplify a loaded model using the ONNX optimizer. Args: - model: The name of the model to simplify (must be loaded in the Calculator). + model: The name of the model to simplify (must be loaded in the ``Calculator``). Returns: None @@ -100,11 +100,11 @@ def simplifyAll(self): self.simplify(model) print('***** Done simplifying all models! *****') - def toFP16(self, model: str): + def toFP16(self, model: str) -> None: """Convert a loaded model to FP16 precision. Args: - model: The name of the model to convert to FP16 (must be loaded in the Calculator). + model: The name of the model to convert to FP16 (must be loaded in the ``Calculator``). Returns: None @@ -124,11 +124,13 @@ def toFP16All(self): self.toFP16(model) print('***** Done converting all models to FP16! *****') - def export(self, model: str): - """Export a loaded model to ONNX format. + def export(self, model: str, append: str = '') -> None: + """Export a loaded model to ``ONNX``format. Args: - model: The name of the model to export (must be loaded in the Calculator). + model: The name of the model to export (must be loaded in the ``Calculator``). + append: A string to append to the exported model name after the model name, simplification marker, and + FP16 marker. Useful for adding a version number or other information to the exported model name. Returns: None @@ -141,25 +143,29 @@ def export(self, model: str): name += '_simplified' if self.fp16Dict[model]: name += '_fp16' + if append: + name += f'_{append}' name += '.onnx' onnx.save(loadedModel, name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to ONNX format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Export all loaded models to ``ONNX`` format with the export function. ``append`` string can be passed to the export + function to append to the exported model name. + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') class TorchExporter: - """Export models to the PyTorch PT format to allow for easy loading and inference in PyTorch in other projects. + """Export models to the ``PyTorch PT`` format to allow for easy loading and inference in PyTorch in other projects. Args: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. Attributes: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. """ def __init__(self, calculator: Calculator): """Initialize the TorchExporter with a calculator object that has loaded models.""" @@ -167,14 +173,16 @@ def __init__(self, calculator: Calculator): assert len(self.calculator.loadedModels) > 0, 'No models loaded in calculator. Nothing to export.' print(f'Initialized TorchExporter with models: {list(self.calculator.loadedModels.keys())}') - def export(self, model: str): - """Export a loaded model to PyTorch PT format. Models are exported in eval mode (no dropout) and saved in the + def export(self, model: str, append: str = '') -> None: + """Export a loaded model to ``PyTorch PT`` format. Models are exported in eval mode (no dropout) and saved in the current working directory. Args: - model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor - (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was + model: The name of the model to export (must be loaded in the ``Calculator``) and it must have a descriptor + (``Ward2017`` or ``KS2022``) defined in the ``Calculator.models`` dictionary created when the ``Calculator`` was initialized. + append: A string to append to the exported model name after the model name. Useful for adding a version + number or other information to the exported model name. Returns: None @@ -200,43 +208,47 @@ def export(self, model: str): tracedModel = torch.jit.trace(loadedModel, inputs_tracer) - name = f"{model}.pt" + name = f"{model}{f'_{append}' if append else ''}.pt" tracedModel.save(name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to PyTorch PT format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Exports all loaded models to PyTorch PT format with the export function. `append` can be passed to the export + function + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') class CoreMLExporter: - """Export models to the CoreML format to allow for easy loading and inference in CoreML in other projects, + """Export models to the ``CoreML`` format to allow for easy loading and inference in ``CoreML`` in other projects, particularly valuable for Apple devices, as pySIPFENN models can be run using the Neural Engine accelerator with minimal power consumption and neat optimizations. Args: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. Attributes: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. """ def __init__(self, calculator: Calculator): self.calculator = calculator assert len(self.calculator.loadedModels)>0, 'No models loaded in calculator. Nothing to export.' print(f'Initialized CoreMLExporter with models: {list(self.calculator.loadedModels.keys())}') - def export(self, model: str): - """Export a loaded model to CoreML format. Models will be saved as {model}.mlpackage in the current working - directory. Models will be annotated with the feature vector name (Ward2017 or KS2022) and the output will be + def export(self, model: str, append: str = '') -> None: + """Export a loaded model to ``CoreML`` format. Models will be saved as ``{model}.mlpackage`` in the current working + directory. Models will be annotated with the feature vector name (``Ward2017`` or ``KS2022``) and the output will be named "property". The latter behavior will be adjusted in the future when model output name and unit will be added to the model JSON metadata. Args: - model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor - (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was + model: The name of the model to export (must be loaded in the ``Calculator``) and it must have a descriptor + (``Ward2017`` or ``KS2022``) defined in the ``calculator.models`` dictionary created when the ``Calculator`` was initialized. + append: A string to append to the exported model name after the model name. Useful for adding a version + number or other information to the exported model name. Returns: None @@ -270,12 +282,14 @@ def export(self, model: str): inputs=inputs_converter, outputs=[ct.TensorType(name='property')] ) - name = f"{model}.mlpackage" + name = f"{model}{f'_{append}' if append else ''}.mlpackage" coreml_model.save(name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to CoreML format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Export all loaded models to ``CoreML`` format with the export function. ``append`` can be passed to the export + function to append to all exported model names. + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 29b684e..224e89e 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -1,27 +1,38 @@ -# General Imports +# Standard Library Imports import os import gc - -import natsort -from pySmartDL import SmartDL +import io +import sys +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') import csv -import numpy as np -from pymatgen.core import Structure import json +from time import perf_counter +from typing import List, Union, Dict +from importlib import resources + +# Helper Imports from tqdm import tqdm from tqdm.contrib.concurrent import process_map -from time import perf_counter +import natsort +from pySmartDL import SmartDL -from importlib import resources +# Scientific Computing Imports +import numpy as np +from pymatgen.core import Structure, Composition +# Machine Learning Imports import torch import onnx2torch import onnx -from typing import List, Union, Dict +# YAML Handling Imports and Configuration +from ruamel.yaml import YAML +from ruamel.yaml.scalarstring import LiteralScalarString # Descriptor Generators -from pysipfenn.descriptorDefinitions import Ward2017, KS2022, KS2022_dilute +from pysipfenn.descriptorDefinitions import ( + Ward2017, KS2022, KS2022_dilute, KS2022_randomSolutions +) # - add new ones here if extending the code @@ -30,36 +41,41 @@ ["Jonathan Siegel", "jwsiegel@tamu.edu"]] __name__ = 'pysipfenn' - +# ********************************* CALCULATION HIGH-LEVEL ENVIRONMENT ********************************* class Calculator: - """ - pySIPFENN Calculator automatically initializes all functionalities including identification and loading - of all available models defined statically in models.json file. It exposes methods for calculating predefined - structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. + """pySIPFENN Calculator automatically initializes all functionalities including identification and loading + of all available models defined statically in the ``models.json`` file. It exposes methods for calculating predefined + structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. - Args: - autoLoad: Automatically load all available models. Default: True. - - Attributes: - models: Dictionary with all model information based on the models.json file in the modelsSIPFENN - directory. The keys are the network names and the values are dictionaries with the model information. - loadedModels: Dictionary with all loaded models. The keys are the network names and the values - are the loaded pytorch models. - descriptorData: List of all descriptor data created during the last predictions run. The order - of the list corresponds to the order of atomic structures given to models as input. The order of the - list of descriptor data for each structure corresponds to the order of networks in the toRun list. - predictions: List of all predictions created during the last predictions run. The order of the - list corresponds to the order of atomic structures given to models as input. The order of the list - of predictions for each structure corresponds to the order of networks in the toRun list. - inputFiles: List of all input file names used during the last predictions run. The order of the list - corresponds to the order of atomic structures given to models as input. + Args: + autoLoad: Automatically load all available ML models based on the ``models.json`` file. This `will` require + significant memory and time if they are available, so for featurization and other non-model-requiring + tasks, it is recommended to set this to ``False``. Defaults to ``True``. + verbose: Print initialization messages and several other non-critical messages during runtime procedures. + Defaults to True. + + Attributes: + models: Dictionary with all model information based on the ``models.json`` file in the modelsSIPFENN + directory. The keys are the network names and the values are dictionaries with the model information. + loadedModels: Dictionary with all loaded models. The keys are the network names and the values + are the loaded pytorch models. + descriptorData: List of all descriptor data created during the last predictions run. The order + of the list corresponds to the order of atomic structures given to models as input. The order of the + list of descriptor data for each structure corresponds to the order of networks in the toRun list. + predictions: List of all predictions created during the last predictions run. The order of the + list corresponds to the order of atomic structures given to models as input. The order of the list + of predictions for each structure corresponds to the order of networks in the toRun list. + inputFiles: List of all input file names used during the last predictions run. The order of the list + corresponds to the order of atomic structures given to models as input. """ def __init__(self, autoLoad: bool = True, verbose: bool = True): + """Initializes the pySIPFENN Calculator object.""" if verbose: print('********* Initializing pySIPFENN Calculator **********') + self.verbose = verbose # dictionary with all model information with resources.files('pysipfenn.modelsSIPFENN').joinpath('models.json').open('r') as f: if verbose: @@ -81,17 +97,24 @@ def __init__(self, else: print(f'Skipping model loading (autoLoad=False)') + self.prototypeLibrary = {} + self.parsePrototypeLibrary(verbose=verbose) + self.toRun = [] self.descriptorData = [] self.predictions = [] + self.metas = { + 'RSS': [] + } self.inputFiles = [] if verbose: print(f'********* pySIPFENN Successfully Initialized **********') def __str__(self): - '''Prints the status of the Calculator object.''' + """Prints the status of the ``Calculator`` object.""" printOut = f'pySIPFENN Calculator Object. Version: {__version__}\n' - printOut += f'Models are located in:\n{resources.files("pysipfenn.modelsSIPFENN")}\n{"-" * 80}\n' + printOut += f'Models are located in:\n {resources.files("pysipfenn.modelsSIPFENN")}\n' + printOut += f'Auxiliary files (incl. structure prototypes):\n {resources.files("pysipfenn.misc")}\n{"-" * 80}\n' printOut += f'Loaded Networks: {list(self.loadedModels.keys())}\n' if len(self.inputFiles) > 0: printOut += f'Last files selected as input: {len(self.inputFiles)}\n' @@ -106,29 +129,97 @@ def __str__(self): printOut += f' {len(self.predictions[0])} predictions/structure\n' return printOut - def updateModelAvailability(self) -> None: + # ********************************* PROTOTYPE HANDLING ********************************* + def parsePrototypeLibrary(self, + customPath: str = "default", + verbose: bool = False, + printCustomLibrary: bool = False) -> None: + """Parses the prototype library YAML file in the ``misc`` directory, interprets them into pymatgen ``Structure`` + objects, and stores them in the ``self.prototypeLibrary`` dict attribute of the ``Calculator`` object. You can use it + also to temporarily append a custom prototype library (by providing a path) which will live as long as the + ``Calculator``. For permanent changes, use ``appendPrototypeLibrary()``. + + Args: + customPath: Path to the prototype library YAML file. Defaults to the magic string ``"default"``, which loads the + default prototype library included in the package in the ``misc`` directory. + verbose: If True, it prints the number of prototypes loaded. Defaults to ``False``, but note that ``Calculator`` + class automatically initializes with ``verbose=True``. + printCustomLibrary: If True, it prints the name and POSCAR of each prototype being added to the prototype + library. Has no effect if ``customPath`` is ``'default'``. Defaults to ``False``. + + Returns: + None """ - Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for - current ONNX model definitions. + yaml_safeLoader = YAML(typ='safe') + + if customPath == 'default': + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: + prototypes = yaml_safeLoader.load(f) + else: + with open(customPath, 'r') as f: + prototypes = yaml_safeLoader.load(f) + if printCustomLibrary: + for prototype in prototypes: + print(f'{prototype["name"]}:\n{prototype["POSCAR"]}') + for prototype in prototypes: + assert isinstance(prototype['name'], str), 'Prototype name must be a string.' + assert isinstance(prototype['POSCAR'], str), 'Prototype POSCAR must be a string.' + assert isinstance(prototype['origin'], str), 'Prototype origin must be a string.' + struct = Structure.from_str(prototype['POSCAR'], fmt='poscar') + assert struct.is_valid(), f'Invalid structure for prototype {prototype["name"]}' + assert struct.is_ordered, f'Unordered structure for prototype {prototype["name"]}. Make sure that the ' \ + f'POSCAR file is in the direct format and that no prior randomization has ' \ + f'been applied to the structure occupancies.' + self.prototypeLibrary.update({ + prototype['name']: { + 'POSCAR': prototype['POSCAR'], + 'structure': struct, + 'origin': prototype['origin'] + } + }) + if verbose: + print(f'{len(self.prototypeLibrary)} prototype structures present into the prototype library.') + + def appendPrototypeLibrary(self, customPath: str) -> None: + """Parses a custom prototype library YAML file and permanently appends it into the internal prototypeLibrary + of the pySIPFENN package. They will be persisted for future use and, by default, they will be loaded + automatically when instantiating the ``Calculator`` object, similar to your custom models. + + Args: + customPath: Path to the prototype library YAML file to be appended to the internal ``self.prototypeLibrary`` + of the ``Calculator`` object. + + Returns: + None """ + + self.parsePrototypeLibrary(customPath=customPath, printCustomLibrary=True, verbose=True) + print(f'Now, {len(self.prototypeLibrary)} prototype structures are present into the prototype library. ' + f'Persisting them for future use.') + overwritePrototypeLibrary(self.prototypeLibrary) + + # ********************************* MODEL HANDLING ********************************* + def updateModelAvailability(self) -> None: + """Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for + current ONNX model definitions.""" with resources.files('pysipfenn.modelsSIPFENN') as p: all_files = os.listdir(p) detectedNets = [] for net, netName in zip(self.network_list, self.network_list_names): if all_files.__contains__(net + '.onnx'): detectedNets.append(net) - print('\u2714 ' + netName) + print('✔ ' + netName) else: - print('\u292B ' + netName) + print('⨯ ' + netName) self.network_list_available = detectedNets def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available - on disk, it is skipped. If a specific network is given, only that network is downloaded possibly overwriting - the existing one. If the networks name is not recognized message is printed. + on disk, it is skipped. If a specific ``network`` is given, only that network is downloaded, possibly overwriting + the existing one. If the ``network`` name is not recognized, the message will be printed. Args: - network: Name of the network to download. Defaults to 'all'. + network: Name of the network to download. Defaults to ``'all'``. """ with resources.files('pysipfenn.modelsSIPFENN') as modelPath: @@ -166,201 +257,396 @@ def downloadModels(self, network: str = 'all') -> None: print('Network name not recognized') self.updateModelAvailability() - def calculate_Ward2017(self, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 4) -> list: - """Calculates Ward2017 descriptors for a list of structures. The calculation can be done in serial or parallel - mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData + def loadModels(self, network: str = 'all') -> None: + """Load model/models into memory of the ``Calculator`` class. The models are loaded from the ``modelsSIPFENN`` directory + inside the package. Its location can be seen by calling ``print()`` on the ``Calculator``. The models are stored in the + ``self.loadedModels`` attribute as a dictionary with the network string as key and the PyTorch model as value. + + Note: + This function only works with models that are stored in the ``modelsSIPFENN`` directory inside the package, + are in ONNX format, and have corresponding entries in ``models.json``. For all others, you will need to use + ``loadModelCustom()``. + + Args: + network: Default is ``'all'``, which loads all models detected as available. Alternatively, a specific model + can be loaded by its corresponding key in models.json. E.g. ``'SIPFENN_Krajewski2020_NN9'`` or + ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in ``downloadModels()``. + + Raises: + ValueError: If the network name is not recognized or if the model is not available in the ``modelsSIPFENN`` + directory. + + Returns: + None. It updates the loadedModels attribute of the Calculatorclass. + """ + with resources.files('pysipfenn.modelsSIPFENN') as modelPath: + if network == 'all': + print('Loading models:') + for net in tqdm(self.network_list_available): + self.loadedModels.update({ + net: onnx2torch.convert(onnx.load(f'{modelPath}/{net}.onnx')).float() + }) + elif network in self.network_list_available: + print('Loading model: ', network) + self.loadedModels.update({ + network: onnx2torch.convert(onnx.load(f'{modelPath}/{network}.onnx')).float() + }) + else: + raise ValueError( + 'Network not available. Please check the network name for typos or run downloadModels() ' + 'to download the models. Currently available models are: ', self.network_list_available) + + def loadModelCustom( + self, + networkName: str, + modelName: str, + descriptor: str, + modelDirectory: str = '.' + ) -> None: + """Load a custom ONNX model from a custom directory specified by the user. The primary use case for this + function is to load models that are not included in the package and cannot be placed in the package + directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. + + Args: + modelDirectory: Directory where the model is located. Defaults to the current directory. + networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be + unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the ``.onnx`` + extension). + modelName: Name of the model. This is the name that will be displayed in the model selection menu. It + can be any string desired. + descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following + descriptors: ``'KS2022'``, and ``'Ward2017'``. + """ + + self.loadedModels.update({ + networkName: onnx2torch.convert(onnx.load(f'{modelDirectory}/{networkName}.onnx')).float() + }) + self.models.update({ + networkName: { + 'name': modelName, + 'descriptor': descriptor + }}) + self.network_list.append(networkName) + self.network_list_names.append(modelName) + self.network_list_available.append(networkName) + print(f'Loaded model {modelName} ({networkName}) from {modelDirectory}') + + def findCompatibleModels(self, descriptor: str) -> List[str]: + """Finds all models compatible with a given descriptor based on the descriptor definitions loaded from the + ``models.json`` file. + + Args: + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` + to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. + + Returns: + List of strings corresponding to compatible models. + """ + + compatibleList = [] + for net in self.models: + if descriptor in self.models[net]['descriptor']: + compatibleList.append(net) + return compatibleList + + # ******************************* DESCRIPTOR HANDLING (MID-LEVEL API) ******************************* + def calculate_Ward2017( + self, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 4 + ) -> list: + """Calculates ``Ward2017`` descriptors for a list of structures. The calculation can be done in serial or parallel + mode. In parallel mode, the number of workers can be specified. The results are stored in the ``self.descriptorData`` attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be - initialized with the pymatgen Structure class. - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 4. + initialized with the pymatgen ``Structure`` class. + mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``4``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of Ward2017 descriptor (feature vector) for each structure. + List of ``Ward2017`` descriptor (feature vector) for each structure. """ if mode == 'serial': descList = [Ward2017.generate_descriptor(s) for s in tqdm(structList)] - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': descList = process_map(Ward2017.generate_descriptor, structList, max_workers=max_workers) - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList - def calculate_KS2022(self, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 8) -> list: - """Calculates KS2022 descriptors for a list of structures. The calculation can be done in serial or parallel + def calculate_KS2022( + self, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 8 + ) -> list: + """Calculates ``KS2022`` descriptors for a list of structures. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be - initialized with the pymatgen Structure class. - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 8. + initialized with the pymatgen ``Structure`` class. + mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of KS2022 descriptor (feature vector) for each structure. + List of ``KS2022`` descriptor (feature vector) for each structure. """ if mode == 'serial': descList = [KS2022.generate_descriptor(s) for s in tqdm(structList)] - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': descList = process_map(KS2022.generate_descriptor, structList, max_workers=max_workers) - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList - def calculate_KS2022_dilute(self, - structList: List[Structure], - baseStruct: Union[str, List[Structure]] = 'pure', - mode: str = 'serial', - max_workers: int = 8) -> list: - """Calculates KS2022 descriptors for a list of dilute structures (either based on pure elements and on custom + def calculate_KS2022_dilute( + self, + structList: List[Structure], + baseStruct: Union[str, List[Structure]] = 'pure', + mode: str = 'serial', + max_workers: int = 8 + ) -> List[np.ndarray]: + """Calculates ``KS2022`` descriptors for a list of dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are - substantial compared to the KS2022 descriptor, which is more general and can be used on any structure. The + substantial compared to the ``KS2022`` descriptor, which is more general and can be used on any structure. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. - The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. + The results are stored in the ``self.descriptorData`` attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. The structures must be initialized with the - pymatgen Structure class. - baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the structures + pymatgen ``Structure`` class. + baseStruct: Non-diluted references for the dilute structures. Defaults to ``'pure'``, which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 8. + mode: Mode of calculation. Defaults to ``'serial'``. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of KS2022 descriptor (feature vector) for each structure. + List of ``KS2022`` descriptor (feature vector) ``np.ndarray`` for each structure. """ if baseStruct == 'pure' or isinstance(baseStruct, Structure): if mode == 'serial': descList = [KS2022_dilute.generate_descriptor(s, baseStruct=baseStruct) for s in tqdm(structList)] - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': - descList = process_map(KS2022_dilute.generate_descriptor(baseStruct=baseStruct), - structList, + pairedInput = list(zip(structList, [baseStruct] * len(structList))) + descList = process_map(wrapper_KS2022_dilute_generate_descriptor, + pairedInput, max_workers=max_workers) - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif isinstance(baseStruct, List) and len(baseStruct) == len(structList): if mode == 'serial': descList = [KS2022_dilute.generate_descriptor(s, bs) for s, bs in tqdm(zip(structList, baseStruct))] - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': - descList = process_map(KS2022_dilute.generate_descriptor, - structList, baseStruct, max_workers=max_workers) - print('Done!') + pairedInput = list(zip(structList, baseStruct)) + descList = process_map(wrapper_KS2022_dilute_generate_descriptor, + pairedInput, max_workers=max_workers) + if self.verbose: + print('Done!') self.descriptorData = descList return descList - - def loadModels(self, network: str = 'all') -> None: - """ - Load model/models into memory of the Calculator class. The models are loaded from the modelsSIPFENN directory inside - the package. It's location can be seen by calling print() on the Calculator. The models are stored in the - loadedModels attribute as a dictionary with the network string as key and the PyTorch model as value. - - Note: - This function only works with models that are stored in the modelsSIPFENN directory inside the package, - are in ONNX format, and have corresponding entries in models.json. For all others, you will need to use - loadModelCustom(). - - Args: - network: Default is 'all', which loads all models detected as available. Alternatively, a specific model - can be loaded by its corresponding key in models.json. E.g. 'SIPFENN_Krajewski2020_NN9' or - 'SIPFENN_Krajewski2022_NN30'. The key is the same as the network argument in downloadModels(). - """ - with resources.files('pysipfenn.modelsSIPFENN') as modelPath: - if network == 'all': - print('Loading models:') - for net in tqdm(self.network_list_available): - self.loadedModels.update({ - net: onnx2torch.convert(onnx.load(f'{modelPath}/{net}.onnx')).float() - }) - elif network in self.network_list_available: - print('Loading model: ', network) - self.loadedModels.update({ - network: onnx2torch.convert(onnx.load(f'{modelPath}/{network}.onnx')).float() - }) else: - raise ValueError( - 'Network not available. Please check the network name for typos or run downloadModels() ' - 'to download the models. Currently available models are: ', self.network_list_available) + raise ValueError('`baseStruct` must be (1) `pure`, (2) `Structure` or a list of them.') + + def calculate_KS2022_randomSolutions( + self, + baseStructList: Union[str, Structure, List[str], List[Structure], List[Union[Composition, str]]], + compList: Union[str, List[str], Composition, List[Composition], List[Union[Composition, str]]], + minimumSitesPerExpansion: int = 50, + featureConvergenceCriterion: float = 0.005, + compositionConvergenceCriterion: float = 0.01, + minimumElementOccurrences: int = 10, + plotParameters: bool = False, + printProgress: bool = False, + mode: str = 'serial', + max_workers: int = 8 + ) -> List[np.ndarray]: + """Calculates ``KS2022`` descriptors corresponding to random solid solutions occupying base structure / lattice + sites for a list of compositions through method described in ``descriptorDefinitions.KS2022_randomSolutions`` + submodule. The results are stored in the descriptorData attribute. The function returns the list of descriptors + in numpy format as well. - def loadModelCustom(self, networkName: str, modelName: str, descriptor: str, modelDirectory: str = '.') -> None: - """ - Load a custom ONNX model from a custom directory specified by the user. The primary use case for this - function is to load models that are not included in the package and cannot be placed in the package - directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. - - Args: - modelDirectory: Directory where the model is located. Defaults to the current directory. - networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be - unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the .onnx - extension). - modelName: Name of the model. This is the name that will be displayed in the model selection menu. It - can be any string desired. - descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following - descriptors: KS2022, and Ward2017. + Args: + baseStructList: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple + Bravis lattice, such as BCC lattice, but can be any ``Structure`` object or a list of them, if you need to + define them on per-case basis. In addition to `Structure` objects, you can use "magic" strings + corresponding to one of the structures in the library you can find under ``pysipfenn.misc`` directory or + loaded under ``self.prototypeLibrary`` attribute. The magic strings include, but are not limited to: + ``'BCC'``, ``'FCC'``, ``'HCP'``, ``'DHCP'``, ``'Diamond'``, and so on. You can invoke them by their name, e.g. ``BCC``, or + by passing ``self.prototypeLibrary['BCC']['structure']`` directly. If you pass a list to ``baseStruct``, + you are allowed to mix-and-match ``Structure`` objects and magic strings. + compList: The composition to populate the supercell with until KS2022 descriptor converges. You can use + pymatgen's ``Composition`` objects or strings of valid chemical formulas (symbol - atomic fraction pairs), + like ``'Fe0.5Ni0.3Cr0.2'``, ``'Fe50 Ni30 Cr20'``, or ``'Fe5 Ni3 Cr2'``. You can either pass a single entity, in + which case it will be used for all structures (use to run the same composition for different base + structures), or a list of entities, in which case pairs will be used in the order of the list. If you + pass a list to ``compList``, you are allowed to mix-and-match ``Composition`` objects and composition + strings. + minimumSitesPerExpansion: The minimum number of sites that the base structure will be expanded to (doubling + dimension-by-dimension) before it is used as expansion step/batch in each iteration of adding local + chemical environment information to the global ensemble. + The optimal value will depend on the number of species and their relative fractions in the composition. + Generally, low values (<20ish) will result in a slower convergence, as some extreme local chemical + environments will have strong influence on the global ensemble, and too high values (>150ish) will + result in a needlessly slow computation for not-complex compositions, as at least two iterations will + be processed. The default value is ``50`` and works well for simple cases. + featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration + (statistics based on the global ensemble of local chemical environments) and the previous iteration + (before last expansion) expressed as a fraction of the maximum value of each feature found in the OQMD + database at the time of SIPFENN creation (see ``KS2022_randomSolutions.maxFeaturesInOQMD`` array). + The default value is ``0.01``, corresponding to 1% of the maximum value. + compositionConvergenceCriterion: The maximum average difference between any element fraction belonging to + the current composition (net of all expansions) and the target composition (``comp``). The default value + is ``0.01``, corresponding to 1% deviation, which interpretation will depend on the number of elements + in the composition. + minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it + is considered converged. This setting prevents the algorithm from converging before very dilute elements + like C in low-carbon steel, have had a chance to occur. The default value is ``10``. + plotParameters: If True, the convergence history will be plotted using plotly. The default value is ``False``, + but tracking them is recommended and will be accessible in the `metas` attribute of the Calculator under + the key ``'RSS'``. + printProgress: If True, the progress will be printed to the console. The default value is False. + mode: Mode of calculation. Options are ``serial`` (default) and ``parallel``. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. + + Returns: + A list of ``numpy.ndarray``s containing the ``KS2022`` descriptor, just like the ordinary ``KS2022``. **Please note + the stochastic nature of this algorithm**. The result will likely vary slightly between runs and parameters, + so if convergence is critical, verify it with a test matrix of ``minimumSitesPerExpansion``, + ``featureConvergenceCriterion``, and ``compositionConvergenceCriterion`` values. """ + # LIST-LIST: Assert that if both baseStruct and compList are lists, they have the same length + if isinstance(baseStructList, list) and isinstance(compList, list): + assert len(baseStructList) == len(compList), \ + 'baseStruct and compList must have the same length if both are lists. If you want to use the same ' \ + 'entity for all calculations, do not wrap it.' + + # STRING / STRUCT handling and extension + if isinstance(baseStructList, str) or isinstance(baseStructList, Structure): + baseStructList = [baseStructList] + if isinstance(compList, list) and len(compList) > 1: + baseStructList = baseStructList * len(compList) + else: + assert isinstance(baseStructList, list), 'baseStruct must be a list if it is not a string or Structure.' - self.loadedModels.update({ - networkName: onnx2torch.convert(onnx.load(f'{modelDirectory}/{networkName}.onnx')).float() - }) - self.models.update({ - networkName: { - 'name': modelName, - 'descriptor': descriptor - }}) - self.network_list.append(networkName) - self.network_list_names.append(modelName) - self.network_list_available.append(networkName) - print(f'Loaded model {modelName} ({networkName}) from {modelDirectory}') + if isinstance(compList, str) or isinstance(compList, Composition): + compList = [compList] + if isinstance(baseStructList, list) and len(baseStructList) > 1: + compList = compList * len(baseStructList) + else: + assert isinstance(compList, list), 'compList must be a list if it is not a string or Composition.' + + # LISTS of STRING / STRUCT + for i in range(len(baseStructList)): + assert isinstance(baseStructList[i], (str, Structure)), \ + 'baseStruct must be a list of strings or Structure objects.' + if isinstance(baseStructList[i], str): + baseStructList[i] = string2prototype(self, baseStructList[i]) + + for i in range(len(compList)): + assert isinstance(compList[i], (str, Composition)), \ + 'compList must be a list of strings or Composition objects.' + if isinstance(compList[i], str): + c = Composition(compList[i]) + assert c.valid, f'Unrecognized composition string: {compList}. Please provide a valid composition ' \ + f'string, e.g. "Fe0.5Ni0.3Cr0.2", "Fe50 Ni30 Cr20", or "Fe5 Ni3 Cr2".' + compList[i] = c + + assert len(baseStructList) == len(compList), 'baseStruct and compList must have the same length at this point.' + pairedInputAndSettings, descList, metaList = [], [], [] + + for i in range(len(baseStructList)): + pairedInputAndSettings.append( + (baseStructList[i], + compList[i], + minimumSitesPerExpansion, + featureConvergenceCriterion, + compositionConvergenceCriterion, + minimumElementOccurrences, + plotParameters, + printProgress, + True)) + + if mode == 'serial': + for base, comp, *settings in tqdm(pairedInputAndSettings): + desc, meta = KS2022_randomSolutions.generate_descriptor(base, comp, *settings) + descList.append(desc) + metaList.append(meta) - def makePredictions(self, - models: Dict[str, torch.nn.Module], - toRun: List[str], - dataInList: List[Union[List[float], np.array]]) -> List[list]: - """Makes predictions using PyTorch networks listed in toRun and provided in models dictionary. + elif mode == 'parallel': + print(pairedInputAndSettings) + descList, metaList = zip(*process_map( + wrapper_KS2022_randomSolutions_generate_descriptor, + pairedInputAndSettings, + max_workers=max_workers + )) + else: + raise ValueError('Incorrect calculation mode selected. Must be either `serial` or `parallel`.') + + if self.verbose: + print('Done!') + self.descriptorData = descList + self.metas['RSS'] = metaList + return descList + + # ******************************* PREDICTION RUNNERS (MID-LEVEL API) ******************************* + def makePredictions( + self, + models: Dict[str, torch.nn.Module], + toRun: List[str], + dataInList: List[Union[List[float], np.array]] + ) -> List[list]: + """Makes predictions using PyTorch networks listed in toRun and provided in models dictionary. Shared among all + "predict" functions. Args: - models: Dictionary of models to use. Keys are network names and values are PyTorch models. - toRun: List of networks to run. Must be a subset of models.keys(). + models: Dictionary of models to use. Keys are network names and values are PyTorch models loaded from ONNX + with ``loadModels()`` / ``loadModelCustom()`` or manually (fairly simple!). + toRun: List of networks to run. It must be a subset of ``models.keys()``. dataInList: List of data to make predictions for. Each element of the list should be a descriptor accepted - by all networks in toRun. Can be a list of lists of floats or a list of numpy arrays. + by all networks in toRun. Can be a list of lists of floats or a list of numpy ``nd.array``s. Returns: - List of predictions. Each element of the list is a list of predictions for all ran network. The order of the - predictions is the same as the order of the networks in toRun. + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the + predictions is the same as the order of the networks in ``toRun``. """ dataOuts = [] - print('Making predictions...') + if self.verbose: + print('Making predictions...') # Run for each network dataIn = torch.from_numpy(np.array(dataInList)).float() + assert set(toRun).issubset(set(models.keys())), 'Some networks to run are not available in the models.' for net in toRun: t0 = perf_counter() model = models[net] @@ -371,54 +657,40 @@ def makePredictions(self, tempOut = model(dataIn) t1 = perf_counter() dataOuts.append(tempOut.cpu().detach().numpy()) - print(f'Prediction rate: {round(len(tempOut) / (t1 - t0), 1)} pred/s') - print(f'Obtained {len(tempOut)} predictions from: {net}') + if self.verbose: + print(f'Prediction rate: {round(len(tempOut) / (t1 - t0), 1)} pred/s') + print(f'Obtained {len(tempOut)} predictions from: {net}') # Transpose and round the predictions dataOuts = np.array(dataOuts).T.tolist()[0] self.predictions = dataOuts return dataOuts - def findCompatibleModels(self, descriptor: str) -> List[str]: - """Finds all models compatible with a given descriptor based on the descriptor definitions loaded from the - models.json file. - - Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions - to see available modules or add yours. Available default descriptors are: 'Ward2017', 'KS2022'. - - Returns: - List of compatible models. - """ - - compatibleList = [] - for net in self.models: - if descriptor in self.models[net]['descriptor']: - compatibleList.append(net) - return compatibleList - - def runModels(self, - descriptor: str, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 4) -> List[list]: + # ******************************* TOP-LEVEL API ******************************* + def runModels( + self, + descriptor: str, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 4 + ) -> List[List[float]]: """Runs all loaded models on a list of Structures using specified descriptor. Supports serial and parallel computation modes. If parallel is selected, max_workers determines number of processes handling the featurization of structures (90-99+% of computational intensity) and models are then run in series. Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions - to see available modules or add yours. Available default descriptors are: 'Ward2017', 'KS2022'. + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` + to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. structList: List of pymatgen Structure objects to run the models on. - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use ``1`` core. Returns: List of predictions. Each element of the list is a list of predictions for all ran networks. The order of the predictions is the same as the order of the input structures. The order of the networks is - the same as the order of the networks in self.network_list_available. If a network is not available, it + the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. If a network is not compatible with the selected descriptor, it will not be included in the list. """ @@ -435,57 +707,65 @@ def runModels(self, print('Calculating descriptors...') if descriptor == 'Ward2017': - self.descriptorData = self.calculate_Ward2017(structList=structList, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_Ward2017( + structList=structList, + mode=mode, + max_workers=max_workers + ) elif descriptor == 'KS2022': - self.descriptorData = self.calculate_KS2022(structList=structList, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_KS2022( + structList=structList, + mode=mode, + max_workers=max_workers + ) else: print('Descriptor handing not implemented. Check spelling.') raise AssertionError - self.predictions = self.makePredictions(models=self.loadedModels, - toRun=self.toRun, - dataInList=self.descriptorData) + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData + ) return self.predictions - def runModels_dilute(self, - descriptor: str, - structList: List[Structure], - baseStruct: Union[str, List[Structure]] = 'pure', - mode: str = 'serial', - max_workers: int = 4) -> List[list]: + def runModels_dilute( + self, + descriptor: str, + structList: List[Structure], + baseStruct: Union[str, List[Structure]] = 'pure', + mode: str = 'serial', + max_workers: int = 4 + ) -> List[List[float]]: """Runs all loaded models on a list of Structures using specified descriptor. A critical difference - from runModels() is that this function will call dilute-specific featurizer, e.g. KS2022_dilute when KS2022 is + from runModels() is that this function will call dilute-specific featurizer, e.g. ``KS2022_dilute`` when ``'KS2022'`` is provided as input, which can only be used on dilute structures (both based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are substantial compared to the KS2022 descriptor, which is more general and can be used on any structure. - Supports serial and parallel modes in the same way as runModels(). + Supports serial and parallel modes in the same way as ``runModels()``. Args: descriptor: Descriptor to use for predictions. Must be one of the descriptors which support the dilute - structures (i.e. *_dilute). See pysipfenn.descriptorDefinitions to see available modules or add yours - here. Available default dilute descriptors are now: 'KS2022'. The 'KS2022' can also be called from - runModels() function, but is not recommended for dilute alloys, as it negates the speed increase of the + structures (i.e. `*_dilute`). See ``pysipfenn.descriptorDefinitions`` to see available modules or add yours + here. Available default dilute descriptors are now: ``'KS2022'``. The ``'KS2022'`` can also be called from + ``runModels()`` function, but is not recommended for dilute alloys, as it negates the speed increase of the dilute structure featurizer. - structList: List of pymatgen Structure objects to run the models on. Must be dilute structures as described + structList: List of pymatgen ``Structure`` objects to run the models on. Must be dilute structures as described above. baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use ``1`` core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks - is the same as the order of the networks in self.network_list_available. If a network is not available, + is the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. If a network is not compatible with the selected descriptor, it will not be included in the list. """ @@ -501,79 +781,133 @@ def runModels_dilute(self, print('Calculating descriptors...') if descriptor == 'KS2022': - self.descriptorData = self.calculate_KS2022_dilute(structList=structList, - baseStruct=baseStruct, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_KS2022_dilute( + structList=structList, + baseStruct=baseStruct, + mode=mode, + max_workers=max_workers + ) else: print('Descriptor handing not implemented. Check spelling.') raise AssertionError - self.predictions = self.makePredictions(models=self.loadedModels, - toRun=self.toRun, - dataInList=self.descriptorData) + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData + ) return self.predictions - def get_resultDicts(self) -> List[dict]: - """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the - names of the networks. The order of the dictionaries is the same as the order of the input structures passed - through runModels() functions. + def runModels_randomSolutions( + self, + descriptor: str, + baseStructList: Union[str, Structure, List[str], List[Structure], List[Union[Composition, str]]], + compList: Union[str, List[str], Composition, List[Composition], List[Union[Composition, str]]], + minimumSitesPerExpansion: int = 50, + featureConvergenceCriterion: float = 0.005, + compositionConvergenceCriterion: float = 0.01, + minimumElementOccurrences: int = 10, + plotParameters: bool = False, + printProgress: bool = False, + mode: str = 'serial', + max_workers: int = 8, + ) -> List[List[float]]: + """A top-level convenience wrapper for the ``calculate_KS2022_randomSolutions`` function. It passes all the + arguments to that function directly (except for ``descriptor`` and uses its result to run all applicable models. + The result is a list of predictions for all run networks. + + Args: + descriptor: Descriptor to use for predictions. Must be one of the descriptors which support the random + solid solution structures (i.e. `*_randomSolutions`). See ``pysipfenn.descriptorDefinitions`` to see + available modules or add yours here. As of v0.15.0, the only available descriptor is + ``'KS2022'`` through its ``KS2022_randomSolutions`` submodule. + baseStructList: See ``calculate_KS2022_randomSolutions`` for details. You can mix-and-match ``Structure`` + objects and magic strings, either individually (to use the same entity for all calculations) or in a + list. + compList: See ``calculate_KS2022_randomSolutions`` for details. You can mix-and-match ``Composition`` + objects and composition strings, either individually (to use the same entity for all calculations) + or in a list. + minimumSitesPerExpansion: See ``calculate_KS2022_randomSolutions``. + featureConvergenceCriterion: See ``calculate_KS2022_randomSolutions``. + compositionConvergenceCriterion: See ``calculate_KS2022_randomSolutions``. + minimumElementOccurrences: See ``calculate_KS2022_randomSolutions``. + plotParameters: See ``calculate_KS2022_randomSolutions``. + printProgress: See ``calculate_KS2022_randomSolutions``. + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not + recommended for small datasets. Returns: - List of dictionaries with the predictions. + List of predictions. They will correspond to the order of the networks in ``self.toRun`` established by the + ``findCompatibleModels()`` function. If a network is not available, it will not be included in the list. """ - return [dict(zip(self.toRun, pred)) for pred in self.predictions] - def get_resultDictsWithNames(self) -> List[dict]: - """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the - names of the networks and the names of the input structures. The order of the dictionaries is the same as the - order of the input structures passed through runModels() functions. Note that this function requires - self.inputFiles to be set, which is done automatically when using runFromDirectory() or - runFromDirectory_dilute() but not when using runModels() or runModels_dilute(), as the input structures are - passed directly to the function and names have to be provided separately by assigning them to self.inputFiles. + self.toRun = list(set(self.findCompatibleModels(descriptor)).intersection(set(self.network_list_available))) + if len(self.toRun) == 0: + print('The list of models to run is empty. This may be caused by selecting a descriptor not ' + 'defined/available, or if the selected descriptor does not correspond to any available network. ' + 'Check spelling and invoke the downloadModels() function if you are using base models.') + raise AssertionError + else: + print(f'Running {self.toRun} models') - Returns: - List of dictionaries with the predictions. - """ - assert self.inputFiles is not [] - assert len(self.inputFiles) == len(self.predictions) - return [ - dict(zip(['name'] + self.toRun, [name] + pred)) - for name, pred in - zip(self.inputFiles, self.predictions)] + print('Calculating descriptors...') + if descriptor == 'KS2022': + self.descriptorData = self.calculate_KS2022_randomSolutions( + baseStructList=baseStructList, + compList=compList, + minimumSitesPerExpansion=minimumSitesPerExpansion, + featureConvergenceCriterion=featureConvergenceCriterion, + compositionConvergenceCriterion=compositionConvergenceCriterion, + minimumElementOccurrences=minimumElementOccurrences, + plotParameters=plotParameters, + printProgress=printProgress, + mode=mode, + max_workers=max_workers + ) + else: + print('Descriptor handing not implemented. Check spelling.') + raise AssertionError + + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData) + + return self.predictions - def runFromDirectory(self, - directory: str, - descriptor: str, - mode: str = 'serial', - max_workers: int = 4 - ) -> List[list]: + def runFromDirectory( + self, + directory: str, + descriptor: str, + mode: str = 'serial', + max_workers: int = 4 + ) -> List[list]: """Runs all loaded models on a list of Structures it automatically imports from a specified directory. The - directory must contain only atomic structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., + directory must contain only atomic structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted using natsort library, so the order of the structures in the directory, as defined by the operating system, is not important. Natural sorting, - for example, will sort the structures in the following order: '1-Fe', '2-Al', '10-xx', '11-xx', '20-xx', - '21-xx', '11111-xx', etc. This is useful when the structures are named using a numbering system. The order of + for example, will sort the structures in the following order: ``'1-Fe'``, ``'2-Al'``, ``'10-xx'``, ``'11-xx'``, ``'20-xx'``, + ``'21-xx'``, ``'11111-xx'``, etc. This is useful when the structures are named using a numbering system. The order of the predictions is the same as the order of the input structures. The order of the networks in a prediction - is the same as the order of the networks in self.network_list_available. If a network is not available, + is the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. Args: directory: Directory containing the structures to run the models on. The directory must contain only atomic - structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., or a mix of these. The structures + structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted as described above. - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipgenn.descriptorDefinitions`` for a list of available descriptors. - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use 1 core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The order of + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks is the same as - the order of the networks in self.network_list_available. If a network is not available, it will not be + the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. """ @@ -593,35 +927,35 @@ def runFromDirectory_dilute(self, mode: str = 'serial', max_workers: int = 8) -> None: """Runs all loaded models on a list of dilute Structures it automatically imports from a specified directory. - The directory must contain only atomic structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., + The directory must contain only atomic structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted using natsort library, so the order of the structures in the directory, as defined by the operating system, is not important. Natural sorting, - for example, will sort the structures in the following order: '1-Fe', '2-Al', '10-xx', '11-xx', '20-xx', - '21-xx', '11111-xx', etc. This is useful when the structures are named using a numbering system. The order of + for example, will sort the structures in the following order: ``'1-Fe'``, ``'2-Al'``, ``'10-xx'``, ``'11-xx'``, ``'20-xx'``, + ``'21-xx'``, ``'11111-xx'``, etc. This is useful when the structures are named using a numbering system. The order of the predictions is the same as the order of the input structures. The order of the networks in a prediction is the same as the order of the networks in self.network_list_available. If a network is not available, it will not be included in the list. Args: directory: Directory containing the structures to run the models on. The directory must contain only atomic - structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., or a mix of these. The structures + structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted as described above. The structures must be dilute structures, i.e. they must contain only one alloying element. - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` for a list of available descriptors. - baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the + baseStruct: Non-diluted references for the dilute structures. Defaults to ``'pure'``, which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 8. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``8``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use 1 core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The order of + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks is the same as - the order of the networks in self.network_list_available. If a network is not available, it will not be + the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. """ @@ -636,15 +970,46 @@ def runFromDirectory_dilute(self, max_workers=max_workers) print('Done!') + + # ******************************* POST-PROCESSING ******************************* + def get_resultDicts(self) -> List[dict]: + """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the + names of the networks. The order of the dictionaries is the same as the order of the input structures passed + through ``runModels()`` functions. + + Returns: + List of dictionaries with the predictions. + """ + return [dict(zip(self.toRun, pred)) for pred in self.predictions] + + def get_resultDictsWithNames(self) -> List[dict]: + """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the + names of the networks and the names of the input structures. The order of the dictionaries is the same as the + order of the input structures passed through ``runModels()`` functions. Note that this function requires + ``self.inputFiles`` to be set, which is done automatically when using ``runFromDirectory()`` or + ``runFromDirectory_dilute()`` but not when using ``runModels()`` or ``runModels_dilute()``, as the input structures are + passed directly to the function and names have to be provided separately by assigning them to ``self.inputFiles``. + + Returns: + List of dictionaries with the predictions. + """ + assert self.inputFiles is not [] + assert len(self.inputFiles) == len(self.predictions) + return [ + dict(zip(['name'] + self.toRun, [name] + pred)) + for name, pred in + zip(self.inputFiles, self.predictions)] + + def writeResultsToCSV(self, file: str) -> None: - """Writes the results to a CSV file. The first column is the name of the structure. If the self.inputFiles - attribute is populated automatically by runFromDirectory() or set manually, the names of the structures will - be used. Otherwise, the names will be '1', '2', '3', etc. The remaining columns are the predictions for each - network. The order of the columns is the same as the order of the networks in self.network_list_available. + """Writes the results to a CSV file. The first column is the name of the structure. If the ``self.inputFiles`` + attribute is populated automatically by ``runFromDirectory()`` or set manually, the names of the structures will + be used. Otherwise, the names will be ``'1'``, ``'2'``, ``'3'``, etc. The remaining columns are the predictions for each + network. The order of the columns is the same as the order of the networks in ``self.network_list_available``. Args: file: Name of the file to write the results to. If the file already exists, it will be overwritten. If the - file does not exist, it will be created. The file must have a '.csv' extension to be recognized + file does not exist, it will be created. The file must have a ``'.csv'`` extension to be recognized correctly. """ @@ -663,16 +1028,17 @@ def writeResultsToCSV(self, file: str) -> None: def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv') -> None: """Writes the descriptor data to a CSV file. The first column is the name of the structure. If the - self.inputFiles attribute is populated automatically by runFromDirectory() or set manually, the names of the - structures will be used. Otherwise, the names will be '1', '2', '3', etc. The remaining columns are the + ``self.inputFiles`` attribute is populated automatically by runFromDirectory() or set manually, the names of the + structures will be used. Otherwise, the names will be ``'1'``, ``'2'``, ``'3'``, etc. The remaining columns are the descriptor values. The order of the columns is the same as the order of the labels in the descriptor definition file. Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions - for a list of available descriptors, such as 'KS2022' and 'Ward2017'. + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipgenn.descriptorDefinitions`` + for a list of available descriptors, such as ``'KS2022'`` and ``'Ward2017'``. It provides the labels for the + descriptor values. file: Name of the file to write the results to. If the file already exists, it will be overwritten. If the - file does not exist, it will be created. The file must have a '.csv' extension to be recognized + file does not exist, it will be created. The file must have a ``'.csv'`` extension to be recognized correctly. """ @@ -707,14 +1073,16 @@ def destroy(self) -> None: del self +# ************************ SATELLITE FUNCTIONS ************************ + def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: - """Converts a Ward 2017 descriptor to a KS2022 descriptor (which is its subset). + """Converts a ``Ward2017`` descriptor to a ``KS2022`` descriptor (which is its subset). Args: - ward2017: Ward2017 descriptor. Must be a 1D NumPy array of length 271. + ward2017: ``Ward2017`` descriptor. Must be a 1D ``np.ndarray`` of length ``271``. Returns: - KS2022 descriptor array. + ``KS2022`` descriptor array. """ assert isinstance(ward2017, np.ndarray) @@ -729,3 +1097,53 @@ def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: ), axis=-1, dtype=np.float32) return ks2022 + +def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: + """Destructively overwrites the prototype library with a custom one. Used by the ``appendPrototypeLibrary()`` function + to persist its changes. The other main use it to restore the default one to the original state based on a backup + made earlier (see tests for an example).""" + yaml_customDumper = YAML() + yaml_customDumper.top_level_colon_align = True + + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('w+') as f: + # Restructure the prototype library back to the original format of a list of dictionaries + print(prototypeLibrary) + prototypeList = [ + { + 'name': key, + 'origin': value['origin'], + 'POSCAR': LiteralScalarString(str(value['POSCAR'])) + } + for key, value in prototypeLibrary.items()] + print(prototypeList) + # Persist the prototype library + yaml_customDumper.dump(prototypeList, f) + print(f'Updated prototype library persisted to {f.name}') + +# *** HELPERS *** +def string2prototype(c: Calculator, prototype: str) -> Structure: + """Converts a prototype string to a pymatgen ``Structure`` object. + + Args: + c: ``Calculator`` object with the ``prototypeLibrary``. + prototype: Prototype string. + + Returns: + ``Structure`` object. + """ + assert isinstance(prototype, str), 'Prototype string must be a string.' + assert prototype in c.prototypeLibrary, \ + f'Unrecognized magic string for baseStruct: {prototype}. Please use one of the recognized magic ' \ + f'strings: {list(c.prototypeLibrary.keys())} or provide a Structure object.' + s: Structure = c.prototypeLibrary[prototype]['structure'] + assert s.is_valid(), f'Invalid structure: {s}' + return s + +# *** WRAPPERS *** +def wrapper_KS2022_dilute_generate_descriptor(args): + """Wraps the ``KS2022_dilute.generate_descriptor`` function for parallel processing.""" + return KS2022_dilute.generate_descriptor(*args) + +def wrapper_KS2022_randomSolutions_generate_descriptor(args): + """Wraps the ``KS2022_randomSolutions.generate_descriptor`` function for parallel processing.""" + return KS2022_randomSolutions.generate_descriptor(*args) \ No newline at end of file diff --git a/pysipfenn/descriptorDefinitions/KS2022_dilute.py b/pysipfenn/descriptorDefinitions/KS2022_dilute.py index dc160be..0404b55 100644 --- a/pysipfenn/descriptorDefinitions/KS2022_dilute.py +++ b/pysipfenn/descriptorDefinitions/KS2022_dilute.py @@ -4,18 +4,13 @@ import time import numpy as np import os -from pymatgen.core import Structure, Element +from pymatgen.core import Structure, Element, PeriodicSite from pymatgen.analysis.local_env import VoronoiNN from pymatgen.symmetry.analyzer import SpacegroupAnalyzer import json from tqdm import tqdm from collections import Counter - -citations = [ - 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' - 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' - 'Materials Science, Volume 208, 2022, 111254' - ] +from typing import List periodic_table_size = 112 attribute_matrix = np.loadtxt(os.path.join(os.path.dirname(__file__), 'Magpie_element_properties.csv'), delimiter=',') @@ -27,7 +22,11 @@ # A prototype function which computes a weighted average over neighbors, # weighted by the area of the voronoi cell between them. -def local_env_function(local_env, site, struct): +def local_env_function( + local_env: dict, + site: PeriodicSite, + struct: Structure, +) -> List[np.ndarray]: local_attributes = np.zeros(attribute_matrix.shape[1]) for key, value in site.species.get_el_amt_dict().items(): local_attributes += value * attribute_matrix[Element(key).Z - 1, :] @@ -68,7 +67,7 @@ def local_env_function(local_env, site, struct): elemental_properties_attributes[1]] -def findDilute(struct): +def findDilute(struct: Structure) -> int: spoList = struct.species_and_occu spCount = dict(Counter(spoList)) spDilute = [spoList.index(sp) for sp in spCount if spCount[sp] == 1] @@ -76,11 +75,18 @@ def findDilute(struct): return spDilute[0] else: print( - 'Custom dilute structure descriptor calculation is defined only one dilute species in a single element matrix') + 'The automated dilute structure descriptor calculation is defined only for cases where there is exactly ONE' + ' dilute species, which exists in a SINGLE component matrix. If you are using a multi-component system, ' + 'please provide a base `Structure` object manually.') raise RuntimeError -def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env_function): +def generate_voronoi_attributes( + struct: Structure, + baseStruct: str = 'pure', + local_funct=local_env_function +) -> (np.ndarray, np.ndarray): + local_generator = LocalAttributeGenerator(struct, local_funct) # Generate a base structure of pure elemental solid or take one as input @@ -99,7 +105,7 @@ def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env baseStruct = struct.copy() for sp in set(baseStruct.species): baseStruct.replace_species({sp: 'A'}) - # Find position of the 1 dilute atom and calculate output for it + # Find the position of the 1 dilute atom and calculate output for it diluteSite = findDilute(struct) else: raise TypeError @@ -145,17 +151,20 @@ def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env # A wrapper class which contains an instance of an NN generator (the default is a VoronoiNN), a structure, and # a function which computes the local environment attributes. class LocalAttributeGenerator: - def __init__(self, struct, local_env_func, - nn_generator=VoronoiNN(compute_adj_neighbors=False, extra_nn_info=False)): + def __init__( + self, + struct: Structure, + local_env_func, + nn_generator: VoronoiNN = VoronoiNN(compute_adj_neighbors=False, extra_nn_info=False)): self.generator = nn_generator self.struct = struct self.function = local_env_func - def generate_local_attributes(self, n): + def generate_local_attributes(self, n: int): local_env = self.generator.get_voronoi_polyhedra(self.struct, n) return self.function(local_env, self.struct[n], self.struct) - def generate_local_attributes_diluteSite(self, n): + def generate_local_attributes_diluteSite(self, n: int): local_env = self.generator.get_voronoi_polyhedra(self.struct, n) local_env_result = self.function(local_env, self.struct[n], self.struct) @@ -172,7 +181,10 @@ def generate_local_attributes_diluteSite(self, n): # Calculates the attributes corresponding to the most common elements. -def magpie_mode(attribute_properties, axis=0): +def magpie_mode( + attribute_properties, + axis: int = 0 +) -> np.ndarray: scores = np.unique(np.ravel(attribute_properties[:, 0])) # get all unique atomic numbers max_occurrence = 0 top_elements = [] @@ -191,7 +203,10 @@ def magpie_mode(attribute_properties, axis=0): return output / len(top_elements) -def generate_descriptor(struct: Structure, baseStruct='pure'): +def generate_descriptor( + struct: Structure, + baseStruct='pure' +) -> np.ndarray: diff_properties, attribute_properties = generate_voronoi_attributes(struct, baseStruct=baseStruct) properties = np.concatenate( (np.stack( @@ -250,11 +265,15 @@ def generate_descriptor(struct: Structure, baseStruct='pure'): return properties -def cite(): - return citation +def cite() -> List[str]: + return [ + 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' + 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' + 'Materials Science, Volume 208, 2022, 111254' + ] -def profile(test='JVASP-10001', nRuns=10): +def profile(test='JVASP-10001', nRuns=10) -> None: if test == 'diluteNiAlloy': print( f'KS2022 profiling/testing task will calculate a descriptor for a dilute Ni alloy {nRuns} times in series.') @@ -270,7 +289,7 @@ def profile(test='JVASP-10001', nRuns=10): print('Done!') -def profileParallel(test='JVASP-10001', nRuns=1000): +def profileParallel(test='JVASP-10001', nRuns=1000) -> None: from tqdm.contrib.concurrent import process_map if test == 'diluteNiAlloy': print( diff --git a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py index dc173ea..5e1c494 100755 --- a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py +++ b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py @@ -5,17 +5,12 @@ import os from pymatgen.core import Structure, Element, Composition from pymatgen.analysis.local_env import VoronoiNN -import json from collections import Counter from typing import List, Union, Tuple import random from importlib import resources - -citations = [ - 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' - 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' - 'Materials Science, Volume 208, 2022, 111254' -] +from tqdm.contrib.concurrent import process_map +import pysipfenn periodic_table_size = 112 attribute_matrix = np.loadtxt(os.path.join(os.path.dirname(__file__), 'Magpie_element_properties.csv'), delimiter=',') @@ -161,30 +156,29 @@ def generate_descriptor(struct: Structure, printProgress: bool = True, returnMeta: bool = False, ) -> Union[np.ndarray, Tuple[np.ndarray, dict]]: - """Main functionality. Generates the KS2022 descriptor for a given composition randomly distributed on a given - structure until the convergence criteria are met. The descriptor is KS2022 which is compatible with all KS2022 - models and approaches values that would be reached by infinite supercell size. + """**Main functionality.** Generates the KS2022 descriptor for a given composition randomly distributed on a given + structure until the convergence criteria are met. The descriptor is **KS2022** which is compatible with all KS2022 + models. It approaches values that would be reached by infinite supercell size. Args: - struct: A pymatgen Structure object that will be used as the basis for the structure to be generated. It can + struct: A pymatgen `Structure` object that will be used as the basis for the structure to be generated. It can be occupied by any species without affecting the result since all will be replaced by the composition. - comp: A pymatgen Composition object that will be randomly distributed on the structure within accuracy - determined by the compositionConvergenceCriterion. + comp: A pymatgen `Composition` object that will be randomly distributed on the structure within accuracy + determined by the `compositionConvergenceCriterion`. minimumSitesPerExpansion: The minimum number of sites that the base structure will be expanded to (doubling - dimension-by-dimension) before it will be used as expansion step in each iteration adding local chemical + dimension-by-dimension) before it is used as an expansion step in each iteration adding local chemical environment information to the global pool. Optimal value will depend on the number of species and their relative fractions in the composition. Generally, low values will result in slower convergence (<20ish) and too high values (>150ish) will result in slower computation. The default value is 50. - featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration - (statistics based on the - global ensemble of local chemical environments) and the previous iteration (before last expansion) - expressed as a fraction of the maximum value of each feature found in the OQMD database at the time of - SIPFENN creation (see maxFeaturesInOQMD array). The default value is 0.01, corresponding to 1% of the - maximum value. + featureConvergenceCriterion: **The maximum difference between any feature belonging to the current iteration + (statistics based on the global ensemble of local chemical environments) and the previous two iterations + (before the last expansion, and the one before that)** expressed as a fraction of the maximum value of each + structure-dependent KS2022 feature found in the OQMD database at the time of SIPFENN creation + (see `maxFeaturesInOQMD` array). The default value is 0.005, corresponding to 0.5% of the maximum value. compositionConvergenceCriterion: The maximum average difference between any element fraction belonging in the - current composition (all expansions) and the the target composition (comp). The default value is 0.01, - corresponding to deviation depending on the number of elements in the composition. + current composition (superposition of all expansions) and the target composition (comp). The default value + is 0.01, corresponding to deviation depending on the number of elements in the composition. minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it is considered converged. This is to prevent the algorithm from converging before very dilute elements have had a chance to occur. The default value is 10. @@ -194,7 +188,7 @@ def generate_descriptor(struct: Structure, descriptor. The default value is False. Returns: By default, a numpy array containing the KS2022 descriptor. Please note the stochastic nature of the - algorithm and that the result may vary slightly between runs and parameters. If returnMeta is True, + algorithm, and that the result may vary slightly between runs and parameters. If returnMeta is True, a tuple containing the descriptor and a dictionary containing the convergence history will be returned. """ @@ -216,14 +210,22 @@ def generate_descriptor(struct: Structure, propHistory = [] diffHistory = [] allOccupations = [] - maxDiff = 1 - compositionDistance = 0 + maxDiff = 5 + compositionDistance = 1 minOccupationCount = 0 - properties = None + properties: np.ndarray = None + currentComposition: Composition = None if printProgress: print(f'#Atoms | Comp. Distance AVG | Convergence Crit. MAX | Occupation Count MIN') + if maxDiff < featureConvergenceCriterion: + raise AssertionError('Invalid convergence criteria (maxDiff < featureConvergenceCriterion).') + if compositionDistance < compositionConvergenceCriterion: + raise AssertionError('Invalid convergence criteria (compositionDistance > compositionConvergenceCriterion).') + if minOccupationCount > minimumElementOccurrences: + raise AssertionError('Invalid convergence criteria (minOccupationCount > minimumElementOccurrences).') + while maxDiff > featureConvergenceCriterion \ or compositionDistance > compositionConvergenceCriterion \ or minOccupationCount < minimumElementOccurrences: @@ -310,11 +312,16 @@ def generate_descriptor(struct: Structure, propHistory.append(properties) # Calculate the difference between the current step and the previous step and divide it by maximum value of # each feature found in OQMD to normalize the difference. - if len(propHistory) > 1: + if len(propHistory) > 2: + # Current iteration diff diff = np.subtract(properties, propHistory[-2]) diff /= maxFeaturesInOQMD diffHistory.append(diff) - maxDiff = np.max(np.abs(diff)) + # Calculate the additional diff to one level older iteration + diff2 = np.subtract(properties, propHistory[-3]) + diff2 /= maxFeaturesInOQMD + # Calculate the maximum difference across both differences + maxDiff = max(np.concatenate((diff, diff2), axis=0)) if printProgress: print(f'{attribute_properties.shape[0]:^6} | ' f'{compositionDistance: 18.6f} | ' @@ -326,13 +333,7 @@ def generate_descriptor(struct: Structure, f'{compositionDistance: 18.6f} | ' f'{"(init)":^21} | ' f'{minOccupationCount:^4}') - - if returnMeta: - metaData = {'diffHistory': diffHistory, - 'propHistory': propHistory, - 'finalAtomsN': attribute_properties.shape[0], - 'finalCompositionDistance': compositionDistance - } + # ^^^ End of the long while-loop above if plotParameters: import plotly.express as px @@ -359,7 +360,13 @@ def generate_descriptor(struct: Structure, assert properties.shape == (256,) assert isinstance(properties, np.ndarray) if returnMeta: - return properties, metaData + return properties, { + 'diffHistory': diffHistory, + 'propHistory': propHistory, + 'finalAtomsN': attribute_properties.shape[0], + 'finalCompositionDistance': compositionDistance, + 'finalComposition': currentComposition.fractional_composition + } else: return properties else: @@ -368,7 +375,11 @@ def generate_descriptor(struct: Structure, def cite() -> List[str]: """Citation/s for the descriptor.""" - return citations + return [ + 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' + 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' + 'Materials Science, Volume 208, 2022, 111254' + ] def onlyStructural(descriptor: np.ndarray) -> np.ndarray: @@ -380,8 +391,8 @@ def onlyStructural(descriptor: np.ndarray) -> np.ndarray: Returns: A 103-length numpy array of the structure-dependent part of the KS2022 descriptor. Useful in cases where the descriptor is used as a fingerprint to compare polymorphs of the same compound. - """ + assert isinstance(descriptor, np.ndarray) assert descriptor.shape == (256,) descriptorSplit = np.split(descriptor, [68, 73, 93, 98, 113]) @@ -418,54 +429,43 @@ def profile(test: str = 'FCC', the descriptor and a dictionary containing the convergence history, or None. In either case, the descriptor will be persisted in `f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv'` file. """ + c = pysipfenn.Calculator(autoLoad=False) - if test == 'FCC': - print( - f'KS2022 Random Solid Solution profiling/testing task will calculate a descriptor for a random FCC alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[3.475145865948011, 0.0, 2.1279131306516942e-16], [5.588460777961125e-16, 3.475145865948011, 2.1279131306516942e-16], [0.0, 0.0, 3.475145865948011]], "pbc": [true, true, true], "a": 3.475145865948011, "b": 3.475145865948011, "c": 3.475145865948011, "alpha": 90.0, "beta": 90.0, "gamma": 90.0, "volume": 41.968081364279875}, "sites": [{"species": [{"element": "Ni", "occu": 1}], "abc": [0.0, 0.0, 0.0], "xyz": [0.0, 0.0, 0.0], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.0, 0.5, 0.5], "xyz": [2.7942303889805623e-16, 1.7375729329740055, 1.7375729329740055], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.5, 0.0, 0.5], "xyz": [1.7375729329740055, 0.0, 1.7375729329740055], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.5, 0.5, 0.0], "xyz": [1.7375729329740057, 1.7375729329740055, 2.1279131306516942e-16], "properties": {}, "label": "Ni"}]}' - elif test == 'BCC': - print('KS2022 Random Solution profiling/testing task will calculate the descriptor for a random BCC alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[2.863035498949916, 0.0, 1.75310362981713e-16], [4.60411223268961e-16, 2.863035498949916, 1.75310362981713e-16], [0.0, 0.0, 2.863035498949916]], "pbc": [true, true, true], "a": 2.863035498949916, "b": 2.863035498949916, "c": 2.863035498949916, "alpha": 90.0, "beta": 90.0, "gamma": 90.0, "volume": 23.468222587900303}, "sites": [{"species": [{"element": "Fe", "occu": 1}], "abc": [0.0, 0.0, 0.0], "xyz": [0.0, 0.0, 0.0], "properties": {}, "label": "Fe"}, {"species": [{"element": "Fe", "occu": 1}], "abc": [0.5, 0.5, 0.5], "xyz": [1.4315177494749582, 1.431517749474958, 1.4315177494749582], "properties": {}, "label": "Fe"}]}' - elif test == 'HCP': - print('KS2022 Random Solution profiling/testing task will calculate the descriptor for a random HCP alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[1.4678659615336875, -2.54241842407729, 0.0], [1.4678659615336875, 2.54241842407729, 0.0], [0.0, 0.0, 4.64085615]], "pbc": [true, true, true], "a": 2.9357319230673746, "b": 2.9357319230673746, "c": 4.64085615, "alpha": 90.0, "beta": 90.0, "gamma": 120.00000000000001, "volume": 34.6386956150451}, "sites": [{"species": [{"element": "Ti", "occu": 1}], "abc": [0.3333333333333333, 0.6666666666666666, 0.25], "xyz": [1.4678659615336875, 0.8474728080257632, 1.1602140375], "properties": {}, "label": "Ti"}, {"species": [{"element": "Ti", "occu": 1}], "abc": [0.6666666666666667, 0.33333333333333337, 0.75], "xyz": [1.4678659615336878, -0.8474728080257634, 3.4806421125], "properties": {}, "label": "Ti"}]}' - else: + try: + s = c.prototypeLibrary[test]['structure'] + except KeyError: raise NotImplementedError(f'Unrecognized test name: {test}') + name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' + if nIterations == 1: - s = Structure.from_dict(json.loads(matStr)) d, meta = generate_descriptor(s, comp, plotParameters=plotParameters, returnMeta=True) print(f"Got meta with :{meta.keys()} keys") + with open(name, 'w+') as f: + f.writelines([f'{v}\n' for v in d]) + if returnDescriptorAndMeta: + return d, meta elif nIterations > 1: print(f'Running {nIterations} iterations in parallel...') - s = Structure.from_dict(json.loads(matStr)) - from tqdm.contrib.concurrent import process_map d = process_map(generate_descriptor, [s for _ in range(nIterations)], [comp for _ in range(nIterations)], chunksize=1, max_workers=8) + with open(name, 'w+') as f: + f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) + return None else: - d = None - - if d is None: print('No descriptors generated.') return None - else: - name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' - if nIterations == 1: - with open(name, 'w+') as f: - f.writelines([f'{v}\n' for v in d]) - if returnDescriptorAndMeta: - return d, meta - else: - with open(name, 'w+') as f: - f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) - return None + print('Done!') if __name__ == "__main__": + print('You are running the KS2022_randomSolutions.py file directly. It is intended to be used as a module. ' + 'A profiling task will now commence, going over several cases. This will take a while.') + profile(test='FCC', plotParameters=True) profile(test='BCC', plotParameters=True) profile(test='HCP', plotParameters=True) diff --git a/pysipfenn/misc/__init__.py b/pysipfenn/misc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pysipfenn/misc/prototypeLibrary.yaml b/pysipfenn/misc/prototypeLibrary.yaml new file mode 100644 index 0000000..0b29eff --- /dev/null +++ b/pysipfenn/misc/prototypeLibrary.yaml @@ -0,0 +1,78 @@ +- name: FCC + origin: https://www.oqmd.org/materials/prototype/A1_Cu + POSCAR: | + A1_Cu + 1.0 + 0.00000 1.80750 1.80750 + 1.80750 0.00000 1.80750 + 1.80750 1.80750 0.00000 + Cu + 1 + Direct + 0.00000 0.00000 0.00000 +- name: BCC + origin: https://www.oqmd.org/materials/prototype/A2_W + POSCAR: | + W + 1.0 + -1.58250 1.58250 1.58250 + 1.58250 -1.58250 1.58250 + 1.58250 1.58250 -1.58250 + W + 1 + Direct + 0.00000 0.00000 0.00000 +- name: HCP + origin: https://www.oqmd.org/materials/prototype/A3_Mg + POSCAR: | + Mg + 1.0 + 3.20900 0.00000 0.00000 + -1.60450 2.77907 0.00000 + 0.00000 0.00000 5.21100 + Mg + 2 + Direct + 0.33333 0.66667 0.25000 + 0.66667 0.33333 0.75000 +- name: Diamond + origin: https://www.oqmd.org/materials/prototype/C(cF8) + POSCAR: | + Si + 1.0 + 0.000000 2.732954 2.732954 + 2.732954 0.000000 2.732954 + 2.732954 2.732954 0.000000 + Si + 2 + Direct + 0.500000 0.500000 0.500000 + 0.750000 0.750000 0.750000 +- name: DHCP + origin: https://www.oqmd.org/materials/prototype/La + POSCAR: | + Nd + 1.0 + 3.68789 0.00000 0.00000 + -1.84394 3.19380 0.00000 + 0.00000 0.00000 11.88128 + Nd + 4 + Direct + 0.00000 0.00000 0.00000 + 0.33333 0.66667 0.25000 + 0.00000 0.00000 0.50000 + 0.66667 0.33333 0.75000 +- name: Sn_A5 + origin: https://www.oqmd.org/materials/prototype/A5_Sn + POSCAR: | + Sn + 1.0 + -2.91550 2.91550 1.59100 + 2.91550 -2.91550 1.59100 + 2.91550 2.91550 -1.59100 + Sn + 2 + Direct + 0.00000 0.00000 0.00000 + 0.75000 0.25000 0.50000 diff --git a/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv b/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv index 731f1a8..e357428 100644 --- a/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv +++ b/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv @@ -1,256 +1,256 @@ -0 , 11.956922 -2.25653E-13 , 8.41582E-14 -0 , 11.956922 -0 , 11.956922 -2.59662E-14 , 9.997E-15 -0 , 1 -0 , 1 -0 , 0.051981524 -6.1973E-16 , 5.05368E-16 -0 , 0.051981524 -0 , 0.051981524 -1.96799E-15 , 7.81255E-15 -0 , 0.68017477 -0.495706 , 7.156059838 -0.3536611 , 1.836693654 -1.4406951 , 1.630815723 -2.075388 , 13.82523968 -3.031089 , 12.19442395 -0.69352 , 9.54204688 -0.3396263 , 2.305430422 -2.7079263 , 3.0541716 -4.030012 , 20.01459632 -5.103964 , 16.96042466 -1.143624 , 15.94405229 -0.7997267 , 4.173909624 -3.4152157 , 3.301960464 -5.195869 , 30.99760564 -7.014918 , 27.69564488 -38.30625 , 497.9362926 -21.44297 , 118.9397736 -172.6905 , 138.1111104 -225.01376 , 1032.616791 -239.11011 , 894.505668 -0.2121525 , 2.71884916 -0.08371147 , 0.689176477 -0.7313326 , 0.811622682 -1.045017 , 5.978859842 -1.335655 , 5.167237158 -0.03960428 , 0.461833102 -0.03373618 , 0.160188756 -0.037334874 , 0.012693857 -0.0969989 , 0.980139291 -0.11200464 , 0.967445437 -0.6193517 , 6.76711774 -0.2724491 , 1.903122842 -2.47034754 , 1.669776075 -2.633975 , 15.9932682 -3.321367 , 14.32349224 -0.01000747 , 0.1416168 -0.005979603 , 0.028839132 -0.05060023 , 0.03627259 -0.05269384 , 0.257864171 -0.08299397 , 0.221591581 -0.06750154 , 0.378515474 -0.02223407 , 0.20529251 -0 , 0 -0.03733486 , 0.999253303 -0.03733486 , 0.999253303 -0.03960428 , 0.461833102 -0.03373618 , 0.160188756 -0.037334874 , 0.012693857 -0.0969989 , 0.980139291 -0.11200464 , 0.967445437 -0.3310651 , 4.107159046 -0.1388009 , 0.981995232 -1.18667435 , 1.292237272 -1.685956 , 8.195215332 -2.0219698 , 6.902978044 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.2633564 , 3.46139504 -0.1110069 , 0.736185641 -1.1866744 , 1.123709131 -1.1266513 , 6.61388276 -1.8883546 , 5.490173664 -0.06750154 , 0.378515474 -0.02223407 , 0.20529251 -0 , 0 -0.03733486 , 0.999253303 -0.03733486 , 0.999253303 -0.1980214 , 2.30916551 -0.1686809 , 0.800943778 -0.18667437 , 0.063469286 -0.4849944 , 4.900696344 -0.5600233 , 4.83722702 -0.1624366 , 2.07243773 -0.0728083 , 0.543533358 -0.53733489 , 0.345320251 -0.6866742 , 4.489935366 -0.8360138 , 4.144615076 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.1622579 , 1.797459108 -0.076864 , 0.505663534 -0.53001155 , 0.406009489 -0.7536616 , 3.975001402 -0.8356548 , 3.568991906 -0.2124889 , 2.836383928 -0.1711379 , 0.832906893 -0.43589822 , 0.428110908 -0.734869 , 5.705615302 -0.7972693 , 5.277504378 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.0927644 , 0.76869418 -0.03608947 , 0.334015947 -0.115505196 , 0.023239168 -0.1501998 , 2.055008426 -0.2079524 , 2.031769284 -0 , 6 -0.02524308 , 0.461492413 -0.03814195 , 0.387214791 -0.0483238 , 0.36416448 -0.05011593 , 0.362282451 -0.0504089 , 0.362087858 -0.73686 , 21.80250736 -0 , 16 -0.3910186 , 6.37080091 -0 , 29 -0 , 13 -0 , 13 -0.783653 , 63.04611016 -0 , 24 -0.3591037 , 7.44639268 -0 , 73 -0 , 49 -0 , 73 -1.650208 , 46.64339502 -0 , 36.56446 -0.892993 , 14.23025184 -0 , 63.546 -0 , 26.981539 -0 , 26.981539 -44.0357 , 1466.598146 -0 , 1246.53 -15.55008 , 413.511442 -0 , 2180 -0 , 933.47 -0 , 933.47 -0.226071 , 10.31995622 -0 , 7 -0.1153323 , 2.112603636 -0 , 13 -0 , 6 -0 , 13 -0.0504262 , 3.637919518 -0 , 1 -0.02872285 , 0.461688638 -0 , 4 -0 , 3 -0 , 3 -0.64773 , 127.1209695 -0 , 18 -0.448271 , 5.511034156 -0 , 139 -0 , 121 -0 , 121 -0.0126491 , 1.754152112 -0 , 0.3 -0.00336262 , 0.128383431 -0 , 1.91 -0 , 1.61 -0 , 1.61 -0.0513139 , 1.744924402 -0 , 1 -0.05056679 , 0.379712166 -0 , 2 -0 , 1 -0 , 2 -0.05042613 , 0.362080494 -0 , 1 -0.02872285 , 0.461688638 -0 , 1 -0 , 0 -0 , 1 -0.4154825 , 4.59214641 -0 , 10 -0.2052717 , 3.323552678 -0 , 10 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.3334517 , 6.699151302 -0 , 8 -0.1331742 , 2.856055348 -0 , 11 -0 , 3 -0 , 3 -0.05131393 , 0.255075604 -0 , 1 -0.05056679 , 0.379712166 -0 , 1 -0 , 0 -0 , 0 -0.2521306 , 1.81040247 -0 , 5 -0.1436144 , 2.308443166 -0 , 5 -0 , 0 -0 , 5 -0.1974431 , 1.787048632 -0 , 5 -0.0906735 , 1.748041772 -0 , 5 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.147626 , 3.85252671 -0 , 5 -0.1222616 , 1.416195596 -0 , 6 -0 , 1 -0 , 5 -0.291387 , 12.79934667 -0 , 6.235 -0.1661144 , 2.663854348 -0 , 16.48 -0 , 10.245 -0 , 16.48 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.10456043 , 0.543091895 -0 , 2.1106627 -0.0729257 , 0.669993732 -0 , 2.1106627 -0 , 0 -0 , 0 -0.01790073 , 0.260508935 -0.010140542 , 0.054071563 -0.0278215 , 0.685419501 -0 , 0 -0 , 0.022248764 -0.000333759 , 0.008531074 \ No newline at end of file +0,11.956922 +5.4845E-13,1.78364E-13 +0,11.956922 +0,11.956922 +4.93266E-14,1.40795E-14 +0,1 +0,1 +0,0.051981524 +3.28969E-15,8.93664E-16 +0,0.051981524 +0,0.051981524 +5.68129E-14,1.81415E-14 +0,0.68017477 +0.3870897,7.12652049 +0.2899097,1.837693244 +1.4187251,1.503641655 +1.969989,13.9414315 +2.687034,12.43778996 +0.611908,9.53060188 +0.2614777,2.284466884 +2.618969,3.066401706 +3.448019,20.47419864 +4.07503,17.40779674 +0.875219,15.87736112 +0.634954,4.17719946 +3.4312229,3.09477411 +3.950918,31.22195464 +5.636962,28.1271806 +31.39746,497.090631 +16.30954,117.7722932 +125.92536,131.2679296 +191.5083,1057.475323 +236.76546,926.2073952 +0.1834395,2.717744856 +0.0802421,0.681151868 +0.73133256,0.840033091 +0.977671,6.09192841 +1.1420163,5.251895358 +0.03386527,0.459122396 +0.02649208,0.16089159 +0.07466975,0.006720277 +0.0969989,0.981932798 +0.1343337,0.975212524 +0.5560754,6.78144089 +0.2390648,1.877612342 +1.47802996,1.58645044 +2.037694,16.22205616 +2.342977,14.63560574 +0.00590287,0.141375248 +0.003675785,0.028447453 +0.036507761,0.035212654 +0.03926406,0.259923003 +0.06024842,0.224710346 +0.04256493,0.380936394 +0.01227298,0.204455063 +0,0 +0,1 +0,1 +0.03386527,0.459122396 +0.02649208,0.16089159 +0.07466975,0.006720277 +0.0969989,0.981932798 +0.1343337,0.975212524 +0.20239,4.09579141 +0.16311316,0.977560412 +0.95498273,1.226774915 +1.5223296,8.267676606 +1.8360136,7.040901604 +0,0 +0,0 +0,0 +0,0 +0,0 +0.139879,3.453541334 +0.11973184,0.729494118 +0.94765933,1.074820572 +1.0593047,6.670559944 +1.6336153,5.59573941 +0.04256493,0.380936394 +0.01227298,0.204455063 +0,0 +0,1 +0,1 +0.1693264,2.295611978 +0.1324603,0.804457947 +0.37334874,0.033601387 +0.4849944,4.909663886 +0.6716685,4.876062508 +0.1462688,2.07578746 +0.07372314,0.531491133 +0.56002307,0.326988845 +0.798679,4.565558512 +0.992677,4.238569714 +0,0 +0,0 +0,0 +0,0 +0,0 +0.1143462,1.799108396 +0.0441769,0.500102094 +0.4030011,0.366938569 +0.6193281,4.01516943 +0.7536616,3.64823086 +0.187165,2.822845694 +0.1357858,0.834206766 +0.57286467,0.38234843 +0.6499481,5.727070804 +0.7513733,5.344722386 +0,0 +0,0 +0,0 +0,0 +0,0 +0.08364035,0.773743065 +0.02746088,0.331854182 +0.115505196,0.019976304 +0.1378528,2.057713226 +0.1600819,2.03773695 +0,6 +0.02158918,0.459222344 +0.03288508,0.383836727 +0.04213888,0.359973431 +0.04385403,0.35796432 +0.04415008,0.357751596 +0.577753,21.8641378 +0,16 +0.401033,6.339413866 +0,29 +0,13 +0,13 +0.966626,62.98378654 +0,24 +0.408654,7.424505706 +0,73 +0,49 +0,73 +1.280416,46.77879226 +0,36.56446 +0.895185,14.15848442 +0,63.546 +0,26.981539 +0,26.981539 +51.3505,1469.79143 +0,1246.53 +18.80364,412.3796886 +0,2180 +0,933.47 +0,933.47 +0.284296,10.30351869 +0,7 +0.1146178,2.107594864 +0,13 +0,6 +0,13 +0.0441693,3.642256634 +0,1 +0.02584112,0.4593225 +0,4 +0,3 +0,3 +0.65975,127.1667527 +0,18 +0.378223,5.520421104 +0,139 +0,121 +0,121 +0.0115318,1.755211672 +0,0.3 +0.00296068,0.128206865 +0,1.91 +0,1.61 +0,1.61 +0.039237,1.743257468 +0,1 +0.03804427,0.381459904 +0,2 +0,1 +0,2 +0.04416913,0.357743385 +0,1 +0.02584112,0.4593225 +0,1 +0,0 +0,1 +0.2941426,4.625083994 +0,10 +0.2393346,3.307764096 +0,10 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.2614398,6.726084862 +0,8 +0.1340166,2.85048759 +0,11 +0,3 +0,3 +0.03923698,0.256742528 +0,1 +0.03804427,0.381459904 +0,1 +0,0 +0,0 +0.2208457,1.788716936 +0,5 +0.1292055,2.296612498 +0,5 +0,0 +0,5 +0.2137315,1.79748213 +0,5 +0.0796188,1.748064026 +0,5 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.1629317,3.842941588 +0,5 +0.1175019,1.419926944 +0,6 +0,1 +0,5 +0.2446755,12.77501871 +0,6.235 +0.1560111,2.649703098 +0,16.48 +0,10.245 +0,16.48 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.0674975,0.547386102 +0,2.1106627 +0.05785012,0.672396997 +0,2.1106627 +0,0 +0,0 +0.013614,0.259207257 +0.008346618,0.053205072 +0.02080855,0.687587671 +0,0 +0,0.022248764 +0.000330044,0.008516198 \ No newline at end of file diff --git a/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml b/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml new file mode 100644 index 0000000..dc1a894 --- /dev/null +++ b/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml @@ -0,0 +1,12 @@ +- name: NicePhase + origin: https://somecustomsource.org + POSCAR: | + A1_U + 1.0 + 0.00000 1.80750 1.80750 + 1.80750 0.00000 1.80750 + 1.80750 1.80750 0.00000 + U + 1 + Direct + 0.00000 0.00000 0.00000 \ No newline at end of file diff --git a/pysipfenn/tests/test_Core_prototypeLibrary.py b/pysipfenn/tests/test_Core_prototypeLibrary.py new file mode 100644 index 0000000..2d7f598 --- /dev/null +++ b/pysipfenn/tests/test_Core_prototypeLibrary.py @@ -0,0 +1,98 @@ +import unittest +from pymatgen.core import Structure +from importlib import resources +import shutil +import pysipfenn +import pytest +import os + +class TestPL(unittest.TestCase): + """Tests correct loading of the prototype library (used, e.g., for random solid solution generation).""" + + def setUp(self) -> None: + """Load the prototype library.""" + self.c = pysipfenn.Calculator(autoLoad=False) + + def test_autoload(self): + """Test that the default prototype library is loaded.""" + self.assertTrue(self.c.prototypeLibrary is not None) + self.assertTrue(len(self.c.prototypeLibrary) > 0) + + def test_defaultPresent(self): + """Test that the loaded prototype library was correctly parsed.""" + for prototype in ["FCC", "BCC", "HCP", "Diamond", "DHCP", "Sn_A5"]: + with self.subTest(msg=prototype): + self.assertTrue(prototype in self.c.prototypeLibrary) + + def test_correctContentFCC(self): + """Test that the FCC prototype was correctly parsed.""" + fcc = self.c.prototypeLibrary["FCC"] + self.assertEqual(fcc["origin"], "https://www.oqmd.org/materials/prototype/A1_Cu") + self.assertEqual( + fcc["POSCAR"], + ('A1_Cu\n' + '1.0\n' + ' 0.00000 1.80750 1.80750\n' + ' 1.80750 0.00000 1.80750\n' + ' 1.80750 1.80750 0.00000\n' + 'Cu\n' + '1\n' + 'Direct\n' + ' 0.00000 0.00000 0.00000\n')) + with self.subTest(msg="Is a pymatgen Structure"): + self.assertTrue(isinstance(fcc["structure"], Structure)) + with self.subTest(msg="Is valid pymatgen Structure"): + self.assertTrue(fcc["structure"].is_valid()) + with self.subTest(msg="Has correct formula"): + self.assertEqual(fcc["structure"].formula, "Cu1") + + def test_customPrototypeLoad(self): + """Test that a custom prototype can be loaded. Then test that a custom prototype can be appended to the default + library and stay there.""" + + with resources.files('pysipfenn').joinpath('tests/testCaseFiles/prototypeLibrary-custom.yaml') as f: + self.c.parsePrototypeLibrary(customPath=f, verbose=True, printCustomLibrary=True) + + with self.subTest(msg="Custom prototype present with correct parse"): + self.assertTrue("NicePhase" in self.c.prototypeLibrary) + self.assertEqual(self.c.prototypeLibrary["NicePhase"]["origin"], "https://somecustomsource.org") + + with self.subTest(msg="Nice phase is a valid pymatgen Structure"): + self.assertTrue(isinstance(self.c.prototypeLibrary["NicePhase"]["structure"], Structure)) + self.assertTrue(self.c.prototypeLibrary["NicePhase"]["structure"].is_valid()) + self.assertEqual(self.c.prototypeLibrary["NicePhase"]["structure"].formula, "U1") + + with self.subTest(msg="FCC prototype still present"): + self.assertTrue("FCC" in self.c.prototypeLibrary) + + with self.subTest(msg="Test that it does not affect the default prototype library"): + otherC = pysipfenn.Calculator(autoLoad=False) + self.assertTrue("NicePhase" not in otherC.prototypeLibrary) + + # Create a backup of the default library + self.c = pysipfenn.Calculator(autoLoad=False) + backup = self.c.prototypeLibrary.copy() + + with resources.files('pysipfenn').joinpath('tests/testCaseFiles/prototypeLibrary-custom.yaml') as f: + self.c.appendPrototypeLibrary(customPath=f) + + with self.subTest(msg="Custom prototype present and valid in a different Calculator instance"): + otherC = pysipfenn.Calculator(autoLoad=False) + self.assertTrue("NicePhase" in otherC.prototypeLibrary) + self.assertEqual(otherC.prototypeLibrary["NicePhase"]["origin"], "https://somecustomsource.org") + self.assertTrue(isinstance(otherC.prototypeLibrary["NicePhase"]["structure"], Structure)) + self.assertTrue(otherC.prototypeLibrary["NicePhase"]["structure"].is_valid()) + self.assertEqual(otherC.prototypeLibrary["NicePhase"]["structure"].formula, "U1") + + with self.subTest(msg="FCC/BCC/HCP prototype still present in a different Calculator instance"): + self.assertTrue("FCC" in otherC.prototypeLibrary) + self.assertTrue("BCC" in otherC.prototypeLibrary) + self.assertTrue("HCP" in otherC.prototypeLibrary) + + with self.subTest(msg="Restore the original prototype library"): + pysipfenn.overwritePrototypeLibrary(backup) + + + + + diff --git a/pysipfenn/tests/test_KS2022_randomSolution.py b/pysipfenn/tests/test_KS2022_randomSolutions.py similarity index 87% rename from pysipfenn/tests/test_KS2022_randomSolution.py rename to pysipfenn/tests/test_KS2022_randomSolutions.py index 45ac2a5..96ad218 100644 --- a/pysipfenn/tests/test_KS2022_randomSolution.py +++ b/pysipfenn/tests/test_KS2022_randomSolutions.py @@ -29,7 +29,10 @@ def test_results(self): feature converges to near exactly the mean value with near-zero range (e.g. coordination number in BCC in case of ideal lattice positions). ''' - testValues, meta = KS2022_randomSolutions.profile(test='BCC', returnDescriptorAndMeta=True, plotParameters=True) + testValues, meta = KS2022_randomSolutions.profile( + test='BCC', + returnDescriptorAndMeta=True, + plotParameters=True) for testValue, descriptorRange, descriptorMean, label in zip( testValues, @@ -37,10 +40,10 @@ def test_results(self): self.descriptorMeanList, self.labels): with self.subTest(msg=f'{label} in BCC alloy'): - self.assertGreaterEqual(testValue, (0.98*descriptorMean)-descriptorRange-1e-6) - self.assertLessEqual(testValue, (1.02*descriptorMean)+descriptorRange+1e-6) + self.assertGreaterEqual(testValue, (0.95*descriptorMean)-descriptorRange-1e-4) + self.assertLessEqual(testValue, (1.05*descriptorMean)+descriptorRange+1e-4) - for field in ['diffHistory', 'propHistory', 'finalAtomsN', 'finalCompositionDistance']: + for field in ['diffHistory', 'propHistory', 'finalAtomsN', 'finalCompositionDistance', 'finalComposition']: with self.subTest(msg=f'{field} present in meta'): self.assertIn(field, meta) @@ -60,7 +63,7 @@ class TestKS2022RandomSolutionProfiling(unittest.TestCase): structures. ''' def test_serialInParallel(self): - '''Tests profiling a set of structures with perallel task execution.''' + '''Tests profiling a set of structures with parallel task execution.''' process_map(KS2022_randomSolutions.profile, ['BCC', 'FCC', 'HCP'], max_workers=3) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index f204da9..29e86cd 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -6,7 +6,7 @@ from importlib import resources from natsort import natsorted -from pymatgen.core import Structure +from pymatgen.core import Structure, Composition IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" and os.getenv("MODELS_FETCHED") != "true" @@ -64,7 +64,7 @@ def testFromPOSCAR_Ward2017(self): print(testFileDir) self.c.runFromDirectory(testFileDir, 'Ward2017') else: - print('Did not detect any Ward2017 models to run') + raise ValueError('Did not detect any Ward2017 models to run') @pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") def testFromPOSCAR_KS2022(self): @@ -79,7 +79,7 @@ def testFromPOSCAR_KS2022(self): print(testFileDir) self.c.runFromDirectory(testFileDir, 'KS2022') else: - print('Did not detect any KS2022 models to run') + raise ValueError('Did not detect any KS2022 models to run') with self.subTest(msg='Test Calculator printout after predictions'): printOut = str(self.c) @@ -128,7 +128,26 @@ def testFromStructure_KS2022_dilute(self): self.assertEqual(val1, val2) else: - print('Did not detect any KS2022 models to run') + raise ValueError('Did not detect any KS2022 models to run') + + @pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") + def testFromPrototypes_KS2022_randomSolution(self): + """Quick runtime test of the top level API for random solution structures. It does not test the accuracy, as + that is delegated elsewhere.""" + + self.c.updateModelAvailability() + toRun = list(set(self.c.findCompatibleModels('KS2022')).intersection(set(self.c.network_list_available))) + if toRun: + preds = self.c.runModels_randomSolutions( + descriptor='KS2022', + baseStructList='FCC', + compList='AuCu', + compositionConvergenceCriterion=0.05, + featureConvergenceCriterion=0.02, + minimumSitesPerExpansion=8, + mode='serial') + else: + raise ValueError('Did not detect any KS2022 models to run') def test_descriptorCalculate_Ward2017_serial(self): '''Test succesful execution of the descriptorCalculate() method with Ward2017 in series. A separate test for @@ -170,6 +189,63 @@ def test_descriptorCalculate_KS2022_parallel(self): descList = self.c.calculate_KS2022(structList=testStructures, mode='parallel', max_workers=4) self.assertEqual(len(descList), len(testStructures)) + def test_descriptorCalculate_KS2022_dilute_serial(self): + """Test succesful execution of the descriptorCalculate() method with KS2022_dilute in series based on an Al + prototype loaded from the default prototype library. A separate test for calculation accuracy is done in + test_KS2022.py""" + diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy() + diluteStruct.make_supercell([2, 2, 2]) + diluteStruct.replace(0, 'Fe') + testStructures = [diluteStruct.copy()]*2 + descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='serial') + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + for desc in descList: + self.assertListEqual( + desc.tolist(), + descList[0].tolist(), + "All descriptors should be equal for the same structure are the same." + ) + + + def test_descriptorCalculate_KS2022_dilute_parallel(self): + """Test succesful execution of the descriptorCalculate() method with KS2022_dilute in parallel based on an Al + prototype loaded from the default prototype library. A separate test for calculation accuracy is done in + test_KS2022.py""" + with self.subTest(msg="Constructing dilute structures"): + diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy() + diluteStruct.make_supercell([2, 2, 2]) + testStructures = [] + for i in range(8): + tempStruct = diluteStruct.copy() + tempStruct.replace(i, 'Fe') + testStructures.append(tempStruct) + + with self.subTest(msg="Running parallel calculation with default 'pure' base structure"): + descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='parallel', max_workers=4) + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + + with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"): + temp0 = descList[0].tolist() + for desc in descList: + temp1 = desc.tolist() + for t0, t1 in zip(temp0, temp1): + self.assertAlmostEqual(t0, t1, places=6) + + with self.subTest(msg="Running parallel calculation with defined base structures"): + baseStructs = [diluteStruct.copy()]*8 + descList = self.c.calculate_KS2022_dilute( + structList=testStructures, + baseStruct=baseStructs, + mode='parallel', + max_workers=4) + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + + with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"): + for desc in descList: + temp1 = desc.tolist() + for t0, t1 in zip(temp0, temp1): + self.assertAlmostEqual(t0, t1, places=6) + def test_RunModels_Errors(self): '''Test that the runModels() and runModels_dilute() methods raise errors correctly when it is called with no models to run or with a descriptor handling that has not been implemented. @@ -231,9 +307,105 @@ def test_CalculatorPrint(self): ''' printOut = str(self.c) self.assertIn('pySIPFENN Calculator Object', printOut) - self.assertIn('Models are located in', printOut) + self.assertIn('Models are located', printOut) self.assertIn('Loaded Networks', printOut) +class TestCoreRSS(unittest.TestCase): + """Test the high-level API functionality of the Calculator object in regard to random solution structures (RSS). It + does not test the accuracy, just all runtime modes and known physicality of the results (e.g., FCC should have + coordination number of `12`). + + Note: + The execution of the descriptorCalculate() method with KS2022_randomSolution is done under coarse settings + (for speed reasons) and should not be used for any accuracy tests. A separate testing for calculation accuracy + against consistency and reference values is done in `test_KS2022_randomSolutions.py`. + """ + def setUp(self): + self.c = pysipfenn.Calculator() + self.assertIsNotNone(self.c) + + def test_descriptorCalculate_KS2022_randomSolution_serial_pair(self): + """Test successful execution of a composition-structure pair in series""" + + with self.subTest(msg="Running single composition-structure pair"): + d1 = self.c.calculate_KS2022_randomSolutions( + 'BCC', + 'FeNi', + minimumSitesPerExpansion=16, + featureConvergenceCriterion=0.02, + compositionConvergenceCriterion=0.05, + mode='serial') + self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") + self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + + def test_descriptorCalculate_KS2022_randomSolution_serial_multiple(self): + """Test successful execution (in series) of multiple compositions occupying the same FCC lattice.""" + with self.subTest(msg="Running multiple compositions occupying the same FCC lattice"): + d2 = self.c.calculate_KS2022_randomSolutions( + 'FCC', + ['FeNi', 'CrNi'], + minimumSitesPerExpansion=16, + featureConvergenceCriterion=0.02, + compositionConvergenceCriterion=0.05, + mode='serial') + self.assertEqual(len(d2), 2, "Two composition-structure pairs should be processed.") + self.assertEqual(len(d2[0]), 256, "All 256 KS2022 features should be obtained.") + self.assertEqual(len(d2[1]), 256, "All 256 KS2022 features should be obtained.") + self.assertAlmostEqual( + float(d2[0][0]), + float(d2[1][0]) + , places=6, msg="Coordination number (KS2022[0]) should be the same (12) for both compositions.") + self.assertNotAlmostEqual( + float(d2[0][13]), + float(d2[1][13]) + , places=6, msg="mean_NeighDiff_shell1_Number (KS2022[13]) should be different (1.0vs2.0)." + ) + + def test_descriptorCalculate_KS2022_randomSolution_parallel_pair(self): + """Test successful execution of a composition-structure pair in parallel mode. Just for the input passing + validation.""" + + with self.subTest(msg="Running single composition-structure pair"): + d1 = self.c.calculate_KS2022_randomSolutions( + 'BCC', + 'FeNi', + mode='parallel', + max_workers=1) + self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") + self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + + def test_descriptorCalculate_KS2022_randomSolution_parallel_multiple(self): + """Test successful execution of manu composition-structure pairs given in ordered lists of input.""" + myBCC = self.c.prototypeLibrary['BCC']['structure'] + + with self.subTest(msg="Running multiple compositions occupying multiple prototypes"): + d2 = self.c.calculate_KS2022_randomSolutions( + ['FCC', myBCC, 'BCC', 'HCP'], + ['WMo', Composition('WMo'), 'FeNi', 'CrNi'], + mode='parallel', + max_workers=4) + self.assertEqual(len(d2), 4, "Four composition-structure pairs should be processed.") + for i in range(4): + self.assertEqual(len(d2[i]), 256, "All 256 KS2022 features should be obtained.") + self.assertNotAlmostEqual( + float(d2[0][0]), + float(d2[1][0]), + places=6, msg="Coordination number (KS2022[0]) should be different for BCC and FCC.") + self.assertAlmostEqual( + float(d2[1][0]), + float(d2[2][0]), + places=6, msg="Coordination number (KS2022[0]) should be the same for both BCCs.") + + with self.subTest(msg='Verify that the metadata was correctly recorded.'): + assert len(self.c.metas['RSS']) == 4, "There should be 4 metadata records." + for meta in self.c.metas['RSS']: + self.assertIn('diffHistory', meta) + self.assertIn('propHistory', meta) + self.assertIn('finalAtomsN', meta) + self.assertIn('finalCompositionDistance', meta) + self.assertIn('finalComposition', meta) + + if __name__ == '__main__': unittest.main()