Skip to content

Commit

Permalink
Handling vetoes and timeslides in pygrb mini followups (#4941)
Browse files Browse the repository at this point in the history
* Use veto and segments files in pycbc_pygrb_minifollowups; enforce using the correct timeslide when following triggers/injections

* Added note about pycbc_pygrb_minifollowups in pycbc_pygrb_page_tables

* Updated construct_trials and generalized extract_basic_trig_properties to extract_trig_properties

* Edited var name to something more meaningful

* More accurate docstring

* Aesthetics and missing import

* Simplified pycbc_pygrb_minifollowups and fixed a sign!

* Removed empty line

* f-string and bare except
  • Loading branch information
pannarale authored Nov 19, 2024
1 parent acecd23 commit 161bcea
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 66 deletions.
101 changes: 75 additions & 26 deletions bin/pygrb/pycbc_pygrb_minifollowups
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,23 @@ def add_wiki_row(outfile, cols):


def make_timeseries_plot(workflow, trig_file, snr_type, central_time,
shift_time, out_dir, ifo=None, tags=None):
out_dir, ifo=None, seg_files=None,
veto_file=None, tags=None):
"""Adds a node for a timeseries of PyGRB results to the workflow"""

tags = [] if tags is None else tags

# Ensure that zero-lag data is used in these follow-up plots and store
# the slide-id option originally provided
orig_slide_id = None
if workflow.cp.has_option('pygrb_plot_snr_timeseries','slide-id'):
orig_slide_id = workflow.cp.get('pygrb_plot_snr_timeseries','slide-id')
workflow.cp.add_options_to_section(
'pygrb_plot_snr_timeseries',
[('slide-id', '0')],
True
)

# Initialize job node with its tags
grb_name = workflow.cp.get('workflow', 'trigger-name')
extra_tags = ['GRB'+grb_name]
Expand All @@ -71,28 +83,42 @@ def make_timeseries_plot(workflow, trig_file, snr_type, central_time,
ifos=workflow.ifos, out_dir=out_dir,
tags=tags+extra_tags).create_node()
node.add_input_opt('--trig-file', trig_file)
# Include the onsource trial if this is a follow up on the onsource
if 'loudest_onsource_event' in tags:
node.add_opt('--onsource')
# Pass the segments files and veto file
if seg_files:
node.add_input_list_opt('--seg-files', seg_files)
if veto_file:
node.add_input_opt('--veto-file', veto_file)
node.new_output_file_opt(workflow.analysis_time, '.png',
'--output-file', tags=extra_tags)
# Quantity to be displayed on the y-axis of the plot
node.add_opt('--y-variable', snr_type)
if ifo is not None:
node.add_opt('--ifo', ifo)
reset_central_time = shift_time - central_time
# Horizontal axis range the = prevents errors with negative times
x_lims = str(-5.+reset_central_time)+','+str(reset_central_time+5.)
node.add_opt('--x-lims='+x_lims)
node.add_opt('--x-lims=-5.,5.')
# Plot title
if ifo is not None:
title_str = f"'{ifo} SNR at {central_time:.3f} (s)'"
else:
title_str = "'%s SNR at %.3f (s)'" % (snr_type.capitalize(),
central_time)
title_str = f"'{snr_type.capitalize()} SNR at {central_time:.3f} (s)'"
node.add_opt('--trigger-time', central_time)
node.add_opt('--plot-title', title_str)

# Add job node to workflow
workflow += node

# Revert the config parser back to how it was given
if orig_slide_id:
workflow.cp.add_options_to_section(
'pygrb_plot_snr_timeseries',
[('slide-id', orig_slide_id)],
True
)
else:
workflow.cp.remove_option('pygrb_plot_snr_timeseries','slide-id')

return node.output_files


Expand All @@ -107,6 +133,11 @@ parser.add_argument('--followups-file',
help="HDF file with the triggers/injections to follow up")
parser.add_argument('--wiki-file',
help="Name of file to save wiki-formatted table in")
parser.add_argument("-a", "--seg-files", nargs="+", action="store",
default=[], help="The location of the buffer, " +
"onsource and offsource txt segment files.")
parser.add_argument("-V", "--veto-file", action="store",
help="The location of the xml veto file.")
wf.add_workflow_command_line_group(parser)
wf.add_workflow_settings_cli(parser, include_subdax_opts=True)
ppu.pygrb_add_bestnr_cut_opt(parser)
Expand Down Expand Up @@ -142,50 +173,68 @@ trig_file = resolve_url_to_file(os.path.abspath(args.trig_file))
ifos = ppu.extract_ifos(os.path.abspath(args.trig_file))
num_ifos = len(ifos)

# File instance of the veto file
veto_file = args.veto_file
start_rundir = os.getcwd()
if veto_file:
veto_file = os.path.join(start_rundir, args.veto_file)
veto_file = wf.resolve_url_to_file(veto_file)

# Convert the segments files to a FileList
seg_files = wf.FileList([
wf.resolve_url_to_file(os.path.join(start_rundir, f))
for f in args.seg_files
])

# (Loudest) off/on-source events are on time-slid data so the
# try will succeed, as it finds the time shift columns.
is_injection_followup = True
try:
time_shift = fp[ifos[0]+' time shift (s)'][0]
is_injection_followup = False
except:
except KeyError:
pass


# Loop over triggers/injections to be followed up
for num_event in range(num_events):
files = FileList([])
logging.info('Processing event: %s', num_event)
logging.info('Processing event: %s', num_event+1)
gps_time = fp['GPS time'][num_event]
gps_time = gps_time.astype(float)
tags = args.tags + [str(num_event+1)]
if wiki_file:
row = []
for key in fp.keys():
row.append(fp[key][num_event])
add_wiki_row(wiki_file, row)
# Handle off/on-source loudest triggers follow-up (which are on slid data)
if not is_injection_followup:
for ifo in ifos:
time_shift = fp[ifo+' time shift (s)'][num_event]
ifo_time = gps_time - time_shift
files += make_timeseries_plot(workflow, trig_file,
'single', gps_time, ifo_time,
args.output_dir, ifo=ifo,
tags=args.tags + [str(num_event)])
files += mini.make_qscan_plot(workflow, ifo, ifo_time,
args.output_dir,
tags=args.tags + [str(num_event)])
# Handle injections (which are on unslid data)
else:
if is_injection_followup:
for snr_type in ['reweighted', 'coherent']:
files += make_timeseries_plot(workflow, trig_file,
snr_type, gps_time, gps_time,
snr_type, gps_time,
args.output_dir, ifo=None,
tags=args.tags + [str(num_event)])
seg_files=seg_files,
veto_file=veto_file,
tags=tags)
for ifo in ifos:
files += mini.make_qscan_plot(workflow, ifo, gps_time,
args.output_dir,
tags=args.tags + [str(num_event)])
tags=tags)
# Handle off/on-source loudest triggers follow-up (which may be on slid
# data in the case of the off-source)
else:
for i_ifo, ifo in enumerate(ifos):
time_shift = fp[ifo+' time shift (s)'][num_event]
ifo_time = gps_time + time_shift
files += make_timeseries_plot(workflow, trig_file,
'single', ifo_time,
args.output_dir, ifo=ifo,
seg_files=seg_files,
veto_file=veto_file,
tags=tags)
files += mini.make_qscan_plot(workflow, ifo, ifo_time,
args.output_dir,
tags=tags)

layouts += list(layout.grouper(files, 2))

Expand Down
2 changes: 1 addition & 1 deletion bin/pygrb/pycbc_pygrb_page_tables
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ if lofft_outfile:
d.append(bestnr)
td.append(d)

# th: table header
# th: table header [pycbc_pygrb_minifollowups looks for 'Slide Num']
th = ['Trial', 'Slide Num', 'p-value', 'GPS time',
'Rec. m1', 'Rec. m2', 'Rec. Mc', 'Rec. spin1z', 'Rec. spin2z',
'Rec. RA', 'Rec. Dec', 'SNR', 'Chi^2', 'Null SNR']
Expand Down
78 changes: 39 additions & 39 deletions pycbc/results/pygrb_postprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
Module to generate PyGRB figures: scatter plots and timeseries.
"""

import os
import logging
import argparse
import copy
Expand All @@ -33,6 +32,7 @@
from scipy import stats
import ligo.segments as segments
from pycbc.events.coherent import reweightedsnr_cut
from pycbc.events import veto
from pycbc import add_common_pycbc_options
from pycbc.io.hdf import HFile

Expand Down Expand Up @@ -242,7 +242,7 @@ def _extract_vetoes(veto_file, ifos, offsource):
vetoes[ifo] = segments.segmentlist([offsource]) - clean_segs[ifo]
vetoes.coalesce()
for ifo in ifos:
for v in vetoes[ifo]:
for v in vetoes[ifo]:
v_span = v[1] - v[0]
logging.info("%ds of data vetoed at GPS time %d",
v_span, v[0])
Expand All @@ -269,7 +269,7 @@ def _slide_vetoes(vetoes, slide_dict_or_list, slide_id, ifos):


# =============================================================================
# Recursive function to reach all datasets in an HDF file handle
# Recursive function to reach all datasets in an HDF file handle
# =============================================================================
def _dataset_iterator(g, prefix=''):
"""Reach all datasets in an HDF file handle"""
Expand Down Expand Up @@ -368,9 +368,6 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None,
return trigs_dict





# =============================================================================
# Detector utils:
# * Function to calculate the antenna response F+^2 + Fx^2
Expand Down Expand Up @@ -460,38 +457,35 @@ def sort_trigs(trial_dict, trigs, slide_dict, seg_dict):
# =============================================================================
# Extract trigger properties and store them as dictionaries
# =============================================================================
def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict,
opts):
"""Extract and store as dictionaries time, SNR, and BestNR of
time-slid triggers"""
def extract_trig_properties(trial_dict, trigs, slide_dict, seg_dict, keys):
"""Extract and store as dictionaries specific keys of time-slid
triggers (trigs) compatibly with the trials dictionary (trial_dict)"""

# Sort the triggers into each slide
sorted_trigs = sort_trigs(trial_dict, trigs, slide_dict, seg_dict)
logger.info("Triggers sorted.")
n_surviving_trigs = sum([len(i) for i in sorted_trigs.values()])
msg = f"{n_surviving_trigs} triggers found within the trials dictionary "
msg += "and sorted."
logging.info(msg)

found_trigs = {}
for key in keys:
found_trigs[key] = {}

# Build the 3 dictionaries
trig_time = {}
trig_snr = {}
trig_bestnr = {}
for slide_id in slide_dict:
slide_trigs = sorted_trigs[slide_id]
indices = numpy.nonzero(
numpy.isin(trigs['network/event_id'], slide_trigs))[0]
if slide_trigs:
trig_time[slide_id] = trigs['network/end_time_gc'][
indices]
trig_snr[slide_id] = trigs['network/coherent_snr'][
indices]
else:
trig_time[slide_id] = numpy.asarray([])
trig_snr[slide_id] = numpy.asarray([])
trig_bestnr[slide_id] = reweightedsnr_cut(
trigs['network/reweighted_snr'][indices],
opts.newsnr_threshold)
for key in keys:
if slide_trigs:
found_trigs[key][slide_id] = get_coinc_snr(trigs)[indices] \
if key == 'network/coincident_snr' else trigs[key][indices]
else:
found_trigs[key][slide_id] = numpy.asarray([])

logger.info("Time, SNR, and BestNR of triggers extracted.")
logging.info("Triggers information extracted.")

return trig_time, trig_snr, trig_bestnr
return found_trigs


# =============================================================================
Expand Down Expand Up @@ -582,9 +576,11 @@ def load_segment_dict(hdf_file_path):
# =============================================================================
# Construct the trials from the timeslides, segments, and vetoes
# =============================================================================
def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes):
"""Constructs trials from triggers, timeslides, segments and vetoes"""
def construct_trials(seg_files, seg_dict, ifos, slide_dict, veto_file,
hide_onsource=True):
"""Constructs trials from segments, timeslides, and vetoes"""

logging.info("Constructing trials.")
trial_dict = {}

# Get segments
Expand All @@ -593,19 +589,23 @@ def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes):
# Separate segments
trial_time = abs(segs['on'])

# Determine the veto segments
vetoes = _extract_vetoes(veto_file, ifos, segs['off'])

# Slide vetoes over trials: this can only *reduce* the analysis time
for slide_id in slide_dict:
# These can only *reduce* the analysis time
curr_seg_list = seg_dict[slide_id]

# Construct the buffer segment list
# Fill in the buffer segment list if the onsource data must be hidden
seg_buffer = segments.segmentlist()
for ifo in ifos:
slide_offset = slide_dict[slide_id][ifo]
seg_buffer.append(segments.segment(segs['buffer'][0] -
slide_offset,
segs['buffer'][1] -
slide_offset))
seg_buffer.coalesce()
if hide_onsource:
for ifo in ifos:
slide_offset = slide_dict[slide_id][ifo]
seg_buffer.append(segments.segment(segs['buffer'][0] -
slide_offset,
segs['buffer'][1] -
slide_offset))
seg_buffer.coalesce()

# Construct the ifo-indexed dictionary of slid veteoes
slid_vetoes = _slide_vetoes(vetoes, slide_dict, slide_id, ifos)
Expand Down

0 comments on commit 161bcea

Please sign in to comment.