Skip to content

Commit

Permalink
Merge pull request #32 from francois-drielsma/develop
Browse files Browse the repository at this point in the history
Significant changes ahead of release 0.2.0
  • Loading branch information
francois-drielsma authored Nov 18, 2024
2 parents 30083b4 + d09435b commit 84a81ce
Show file tree
Hide file tree
Showing 126 changed files with 4,908 additions and 1,788 deletions.
2 changes: 1 addition & 1 deletion bin/larcv_check_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def main(source, source_list, output):
(set(keys_list[idx]) != set(all_keys))):
print(f"- Bad file: {file_path}")
out_file.write(f'{file_path}\n')
bad_files += file_path
bad_files.append(file_path)

suffix = ':' if len(bad_files) > 0 else '.'
print(f"\nFound {len(bad_files)} bad files{suffix}")
Expand Down
54 changes: 49 additions & 5 deletions spine/ana/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,19 @@ class AnaBase(ABC):
units : str
Units in which the coordinates are expressed
"""

# Name of the analysis script (as specified in the configuration)
name = None

# Alternative allowed names of the analysis script
aliases = ()
keys = None

# Units in which the analysis script expects objects to be expressed in
units = 'cm'

# Set of data keys needed for this analysis script to operate
_keys = ()

# List of recognized object types
_obj_types = ('fragment', 'particle', 'interaction')

Expand Down Expand Up @@ -58,9 +66,7 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
Name to prefix every output CSV file with
"""
# Initialize default keys
if self.keys is None:
self.keys = {}
self.keys.update({
self.update_keys({
'index': True, 'file_index': True,
'file_entry_index': False, 'run_info': False
})
Expand Down Expand Up @@ -104,7 +110,9 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
self.obj_keys = (self.fragment_keys
+ self.particle_keys
+ self.interaction_keys)
self.keys.update({k:True for k in self.obj_keys})

# Update underlying keys, if needed
self.update_keys({k:True for k in self.obj_keys})

# Store the append flag
self.append_file = append
Expand Down Expand Up @@ -136,6 +144,42 @@ def initialize_writer(self, name):
file_name, append=self.append_file,
overwrite=self.overwrite_file)

@property
def keys(self):
"""Dictionary of (key, necessity) pairs which determine which data keys
are needed/optional for the post-processor to run.
Returns
-------
Dict[str, bool]
Dictionary of (key, necessity) pairs to be used
"""
return dict(self._keys)

@keys.setter
def keys(self, keys):
"""Converts a dictionary of keys to an immutable tuple.
Parameters
----------
Dict[str, bool]
Dictionary of (key, necessity) pairs to be used
"""
self._keys = tuple(keys.items())

def update_keys(self, update_dict):
"""Update the underlying set of keys and their necessity in place.
Parameters
----------
update_dict : Dict[str, bool]
Dictionary of (key, necessity) pairs to update the keys with
"""
if len(update_dict) > 0:
keys = self.keys
keys.update(update_dict)
self._keys = tuple(keys.items())

def get_base_dict(self, data):
"""Builds the entry information dictionary.
Expand Down
10 changes: 10 additions & 0 deletions spine/ana/diag/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
'''Diagnostic analaysis scripts.
This submodule is use to run basic diagnostics analyses such as:
- Track dE/dx profile
- Track energy reconstruction
- Shower start dE/dx
- ...
'''

from .shower import *
64 changes: 64 additions & 0 deletions spine/ana/diag/shower.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
'''Module to evaluate diagnostic metrics on showers.'''

from spine.ana.base import AnaBase

__all__ = ['ShowerStartDEdxAna']


class ShowerStartDEdxAna(AnaBase):
"""This analysis script computes the dE/dx value within some distance
from the start point of an EM shower object.
This is a useful diagnostic tool to evaluate the calorimetric separability
of different EM shower types (electron vs photon), which are expected to
have different dE/dx patterns near their start point.
"""

# Name of the analysis script (as specified in the configuration)
name = 'shower_start_dedx'

def __init__(self, radius, obj_type='particle', run_mode='both',
truth_point_mode='points', truth_dep_mode='depositions',
**kwargs):
"""Initialize the analysis script.
Parameters
----------
radius : Union[float, List[float]]
Radius around the start point for which evaluate dE/dx
**kwargs : dict, optional
Additional arguments to pass to :class:`AnaBase`
"""
# Initialize the parent class
super().__init__(obj_type, run_mode, **kwargs)

# Store parameters
self.radius = radius

# Initialize the CSV writer(s) you want
for obj in self.obj_type:
self.initialize_writer(obj)

def process(self, data):
"""Evaluate shower start dE/dx for one entry.
Parameters
----------
data : dict
Dictionary of data products
"""
# Fetch the keys you want
data = data['prod']

# Loop over all requested object types
for key in self.obj_keys:
# Loop over all objects of that type
for obj in data[key]:
# Do something with the object
disp = p.end_point - p.start_point

# Make a dictionary of integer out of it
out = {'disp_x': disp[0], 'disp_y': disp[1], 'disp_z': disp[2]}

# Write the row to file
self.append('template', **out)
12 changes: 8 additions & 4 deletions spine/ana/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
ANA_DICT.update(**module_dict(module))


def ana_script_factory(name, cfg, overwrite=False, log_dir=None, prefix=None):
def ana_script_factory(name, cfg, overwrite=None, log_dir=None, prefix=None):
"""Instantiates an analyzer module from a configuration dictionary.
Parameters
Expand All @@ -22,7 +22,7 @@ def ana_script_factory(name, cfg, overwrite=False, log_dir=None, prefix=None):
parent_path : str
Path to the parent directory of the main analysis configuration. This
allows for the use of relative paths in the analyzers.
overwrite : bool, default False
overwrite : bool, optional
If `True`, overwrite the CSV logs if they already exist
log_dir : str, optional
Output CSV file directory (shared with driver log)
Expand All @@ -39,5 +39,9 @@ def ana_script_factory(name, cfg, overwrite=False, log_dir=None, prefix=None):
cfg['name'] = name

# Instantiate the analysis script module
return instantiate(
ANA_DICT, cfg, overwrite=overwrite, log_dir=log_dir, prefix=prefix)
if overwrite is not None:
return instantiate(
ANA_DICT, cfg, overwrite=overwrite, log_dir=log_dir, prefix=prefix)
else:
return instantiate(
ANA_DICT, cfg, log_dir=log_dir, prefix=prefix)
6 changes: 4 additions & 2 deletions spine/ana/manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Manages the operation of analysis scripts."""

from copy import deepcopy
from collections import defaultdict, OrderedDict

import numpy as np
Expand Down Expand Up @@ -35,7 +36,7 @@ def __init__(self, cfg, log_dir=None, prefix=None):
# Parse the analysis block configuration
self.parse_config(log_dir, prefix, **cfg)

def parse_config(self, log_dir, prefix, overwrite=False,
def parse_config(self, log_dir, prefix, overwrite=None,
prefix_output=False, **modules):
"""Parse the analysis tool configuration.
Expand All @@ -46,14 +47,15 @@ def parse_config(self, log_dir, prefix, overwrite=False,
prefix : str
Input file prefix. If requested, it will be used to prefix
all the output CSV files.
overwrite : bool, default False
overwrite : bool, optional
If `True`, overwrite the CSV logs if they already exist
prefix_output : bool, optional
If `True`, will prefix the output CSV names with the input file name
**modules : dict
List of analysis script modules
"""
# Loop over the analyzer modules and get their priorities
modules = deepcopy(modules)
keys = np.array(list(modules.keys()))
priorities = -np.ones(len(keys), dtype=np.int32)
for i, k in enumerate(keys):
Expand Down
57 changes: 40 additions & 17 deletions spine/ana/metric/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ class ClusterAna(AnaBase):
- particles
- interactions
"""

# Name of the analysis script (as specified in the configuration)
name = 'cluster_eval'

# Label column to use for each clustering label_col
_label_cols = {
'fragment': CLUST_COL, 'particle': GROUP_COL,
'interaction': INTER_COL
}
_label_cols = (
('fragment', CLUST_COL),
('particle', GROUP_COL),
('interaction', INTER_COL)
)

def __init__(self, obj_type=None, use_objects=False, per_object=True,
per_shape=True, metrics=('pur', 'eff', 'ari'),
Expand All @@ -53,7 +56,7 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True,
label_key : str, default 'clust_label_adapt'
Name of the tensor which contains the cluster labels, when
using the raw reconstruction output
label_col : str
label_col : str, optional
Column name in the label tensor specifying the aggregation label_col
**kwargs : dict, optional
Additional arguments to pass to :class:`AnaBase`
Expand All @@ -71,9 +74,9 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True,

# Initialize the parent class
super().__init__(obj_type, 'both', **kwargs)
if not use_objects:
for key in self.obj_keys:
del self.keys[key]


# If the clustering is not done per object, fix target
if not per_object:
self.obj_type = [label_col]

Expand All @@ -90,27 +93,47 @@ def __init__(self, obj_type=None, use_objects=False, per_object=True,
# Convert metric strings to functions
self.metrics = {m: getattr(spine.utils.metrics, m) for m in metrics}

# List the necessary data products
# If objects are not used, remove them from the required keys
keys = self.keys
if not use_objects:
for key in self.obj_keys:
del keys[key]

# List other necessary data products
if self.per_object:
if not self.use_objects:
# Store the labels and the clusters output by the reco chain
self.keys[label_key] = True
keys[label_key] = True
for obj in self.obj_type:
self.keys[f'{obj}_clusts'] = True
self.keys[f'{obj}_shapes'] = True
keys[f'{obj}_clusts'] = True
keys[f'{obj}_shapes'] = True

else:
self.keys['points'] = True
keys['points'] = True

else:
self.keys[label_key] = True
self.keys['clusts'] = True
self.keys['group_pred'] = True
keys[label_key] = True
keys['clusts'] = True
keys['group_pred'] = True

self.keys = keys

# Initialize the output
for obj in self.obj_type:
self.initialize_writer(obj)

@property
def label_cols(self):
"""Dictionary of (key, column_id) pairs which determine which column
in the label tensor corresponds to a specific clustering target.
Returns
-------
Dict[str, int]
Dictionary of (key, column_id) mapping from name to label column
"""
return dict(self._label_cols)

def process(self, data):
"""Store the clustering metrics for one entry.
Expand All @@ -124,7 +147,7 @@ def process(self, data):
# Build the cluster labels for this object type
if not self.use_objects:
# Fetch the right label column
label_col = self.label_col or self._label_cols[obj_type]
label_col = self.label_col or self.label_cols[obj_type]
num_points = len(data[self.label_key])
labels = data[self.label_key][:, label_col]
shapes = data[self.label_key][:, SHAPE_COL]
Expand Down
8 changes: 6 additions & 2 deletions spine/ana/metric/point.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ class PointProposalAna(AnaBase):
- Point type classification accuracy
- Point end classification accuracy
"""

# Name of the analysis script (as specified in the configuration)
name = 'point_eval'

# Set of data keys needed for this analysis script to operate
_keys = (('ppn_pred', True),)

def __init__(self, num_classes=LOWES_SHP, label_key='ppn_label',
endpoints=False, **kwargs):
"""Initialize the analysis script.
Expand All @@ -51,8 +56,7 @@ def __init__(self, num_classes=LOWES_SHP, label_key='ppn_label',
self.endpoints = endpoints

# Append other required key
self.keys['ppn_pred'] = True
self.keys[self.label_key] = True
self.update_keys({self.label_key: True})

# Initialize the output
self.initialize_writer('truth_to_reco')
Expand Down
Loading

0 comments on commit 84a81ce

Please sign in to comment.