Final version of pycbc_pygrb_plot_chisq_veto and `pycbc_pygrb_plot_…

…coh_ifosnr` (#4950) * Version of pycbc_pygrb_plot_chisq_veto with vetoes (albeit with a mute apply_vetoes_to_found_injs for the time being) * Version of pycbc_pygrb_plot_coh_ifosnr with vetoes (albeit with a mute apply_vetoes_to_found_injs for the time being) * Update f-string in pycbc_pygrb_plot_coh_ifosnr Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * Update f-string in pycbc_pygrb_plot_chisq_veto Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * Update f-string in pycbc_pygrb_plot_chisq_veto Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * Update f-string in pycbc_pygrb_plot_chisq_veto Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * Comment corrected * Removed dof_key variable * Update bin/pygrb/pycbc_pygrb_plot_chisq_veto Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * numpy.where --> numpy.nonzero in pycbc_pygrb_plot_coh_ifosnr Co-authored-by: Tito Dal Canton <tito@dalcanton.it> * senstvty --> sensitivity * Comprehension for dictionary generation * Removed try-except * Improved comments --------- Co-authored-by: Tito Dal Canton <tito@dalcanton.it>
gwastro · Nov 23, 2024 · b7cb8bc · b7cb8bc
1 parent 47d4d8d
commit b7cb8bc
Show file tree

Hide file tree

Showing 2 changed files with 214 additions and 229 deletions.
diff --git a/bin/pygrb/pycbc_pygrb_plot_chisq_veto b/bin/pygrb/pycbc_pygrb_plot_chisq_veto
@@ -48,79 +48,6 @@ __program__ = "pycbc_pygrb_plot_chisq_veto"
 # =============================================================================
 # Functions
 # =============================================================================
-# Function to load trigger data: includes applying cut in reweighted SNR
-def load_data(input_file, ifos, vetoes, opts, injections=False, slide_id=None):
-    """Load data from a trigger/injection file"""
-
-    snr_type = opts.snr_type
-    veto_type = opts.y_variable
-
-    # Initialize the dictionary
-    data = {}
-    data[snr_type] = None
-    data[veto_type] = None
-    data['dof'] = None
-
-    # Ensure that newtwork power chi-square plots show all the data to see
-    # the impact of the reweighted SNR cut
-    rw_snr_threshold = 0. if veto_type=='network' else opts.newsnr_threshold
-
-    if input_file:
-        if injections:
-            logging.info("Loading injections...")
-            # This will eventually become load_injections
-            trigs_or_injs = \
-                ppu.load_triggers(input_file, ifos, vetoes,
-                                  rw_snr_threshold=rw_snr_threshold,
-                                  slide_id=slide_id)
-        else:
-            logging.info("Loading triggers...")
-            trigs_or_injs = \
-                ppu.load_triggers(input_file, ifos, vetoes,
-                                  rw_snr_threshold=rw_snr_threshold,
-                                  slide_id=slide_id)
-
-        # Count surviving points
-        num_trigs_or_injs = len(trigs_or_injs['network/reweighted_snr'])
-
-        if snr_type in ['coherent', 'null', 'reweighted']:
-            data[snr_type] = trigs_or_injs['network/%s_snr' % snr_type][:]
-        elif snr_type == 'single':
-            key = opts.ifo + '/snr'
-            data[snr_type] = trigs_or_injs[key][:]
-
-        # Calculate coincident SNR
-        elif snr_type == 'coincident':
-            data[snr_type] = ppu.get_coinc_snr(trigs_or_injs)
-
-        # Tags to find vetoes in HDF files
-        veto_tags = {'power': 'chisq',
-                     'bank': 'bank_chisq',
-                     'auto': 'auto_chisq',
-                     'network': 'my_network_chisq'}
-
-        # This chi-square is already normalized
-        if veto_type == 'network':
-            chisq_key = 'network/my_network_chisq'
-            data['dof'] = 1.
-        else:
-            chisq_key = opts.ifo + '/' + veto_tags[veto_type]
-            dof_key = '%s/%s_dof' % (opts.ifo, veto_tags[veto_type])
-            data['dof'] = trigs_or_injs[dof_key][:]
-
-        # Normalize
-        data[veto_type] = trigs_or_injs[chisq_key][:]/data['dof']
-
-        # Floor single IFO chi-square at 0.005
-        numpy.putmask(data[veto_type], data[veto_type] == 0, 0.005)
-
-        label = "injections" if injections else "triggers"
-
-        logging.info("{0} {1} found.".format(num_trigs_or_injs, label))
-
-    return data
-
-
 # Function to calculate chi-square weight for the reweighted SNR
 def new_snr_chisq(snr, new_snr, chisq_index=4.0, chisq_nhigh=3.0):
     """Returns the chi-square value needed to weight SNR into new SNR"""
@@ -133,7 +60,7 @@ def new_snr_chisq(snr, new_snr, chisq_index=4.0, chisq_nhigh=3.0):
 
 
 # Function that produces the contours to be plotted
-def calculate_contours(trig_data, opts, new_snrs=None):
+def calculate_contours(opts, new_snrs=None):
     """Generate the contours for the veto plots"""
 
     # Add the new SNR threshold contour to the list if necessary
@@ -219,13 +146,13 @@ veto_labels = {'network': "Network Power",
                'auto': "Auto",
                'power': "Power"}
 if opts.plot_title is None:
-    opts.plot_title = " %s Chi Square" % veto_labels[veto_type]
+    opts.plot_title = veto_labels[veto_type] + " Chi Square"
     if veto_type != 'network':
         opts.plot_title = ifo + opts.plot_title
     if snr_type == 'single':
-        opts.plot_title += " vs %s SNR" % (ifo)
+        opts.plot_title += f" vs {ifo} SNR"
     else:
-        opts.plot_title += " vs %s SNR" % snr_type.capitalize()
+        opts.plot_title += f" vs {snr_type.capitalize()} SNR"
 if opts.plot_caption is None:
     opts.plot_caption = ("Blue crosses: background triggers. ")
     if found_missed_file:
@@ -243,54 +170,116 @@ outdir = os.path.split(os.path.abspath(opts.output_file))[0]
 if not os.path.isdir(outdir):
     os.makedirs(outdir)
 
-# Extract IFOs and vetoes
-ifos, vetoes = ppu.extract_ifos_and_vetoes(trig_file, opts.veto_files,
-                                           opts.veto_category)
-
-# Exit gracefully if the requested IFO is not available
-if ifo and ifo not in ifos:
-    err_msg = "The IFO selected with --ifo is unavailable in the data."
-    raise RuntimeError(err_msg)
+# Extract IFOs
+ifos = ppu.extract_ifos(trig_file)
+
+# Generate time-slides dictionary
+slide_dict = ppu.load_time_slides(trig_file)
+
+# Generate segments dictionary
+segment_dict = ppu.load_segment_dict(trig_file)
+
+# Construct trials removing vetoed times
+trial_dict, total_trials = ppu.construct_trials(
+    opts.seg_files,
+    segment_dict,
+    ifos,
+    slide_dict,
+    opts.veto_file
+)
+
+# Load trigger and injections data: ensure that newtwork power chi-square plots
+# show all the data to see the impact of the reweighted SNR cut, otherwise remove
+# points with reweighted SNR below threshold
+rw_snr_threshold = None if veto_type == 'network' else opts.newsnr_threshold
+trig_data = ppu.load_data(trig_file, ifos, data_tag='trigs',
+                          rw_snr_threshold=rw_snr_threshold,
+                          slide_id=opts.slide_id)
+inj_data = ppu.load_data(found_missed_file, ifos, data_tag='injs',
+                         rw_snr_threshold=rw_snr_threshold,
+                         slide_id=0)
+
+# Dataset name for the horizontal direction
+if snr_type == 'single':
+    x_key = ifo + '/snr'
+else:
+    x_key = 'network/' + snr_type + '_snr'
+# Dataset name for the vertical direction and for normalization
+if veto_type == 'power':
+    y_key = opts.ifo + '/chisq'
+elif veto_type in ['bank', 'auto']:
+    y_key = opts.ifo + '/' + veto_type +'_chisq'
+else:
+    y_key = 'network/my_network_chisq'
+
+keys = [x_key, y_key]
+# The network chi-square is already normalized so it does not require a key
+# for the number of degrees of freedom
+if veto_type != 'network':
+    keys += [y_key + '_dof']
+
+# Extract needed trigger properties and store them as dictionaries
+# Based on trial_dict: if vetoes were applied, trig_* are the veto survivors
+found_trigs_slides = ppu.extract_trig_properties(
+    trial_dict,
+    trig_data,
+    slide_dict,
+    segment_dict,
+    keys
+)
+found_trigs = {}
+for key in keys:
+    found_trigs[key] = numpy.concatenate(
+       [found_trigs_slides[key][slide_id][:] for slide_id in slide_dict]
+    )
+
+# Gather injections found surviving vetoes
+found_injs, *_ = ppu.apply_vetoes_to_found_injs(
+    opts.found_missed_file,
+    inj_data,
+    ifos,
+    veto_file=opts.veto_file,
+    keys=keys
+)
 
-# Extract trigger data
-trig_data = load_data(trig_file, ifos, vetoes, opts,
-                      slide_id=opts.slide_id)
+# Sanity checks
+for test in zip(keys[0:2], ['x', 'y']):
+    if found_trigs[test[0]] is None and found_injs[test[0]] is None:
+        err_msg = "No data to be plotted on the " + test[1] + "-axis was found"
+        raise RuntimeError(err_msg)
 
-# Extract (or initialize) injection data
-inj_data = load_data(found_missed_file, ifos, vetoes, opts,
-                     injections=True, slide_id=0)
+# Normalize chi-squares with the number of degrees of freedom
+if len(keys) == 3:
+    found_trigs[keys[1]] /= found_trigs[keys[2]]
+    found_injs[keys[1]] /= found_injs[keys[2]]
 
-# Sanity checks
-if trig_data[snr_type] is None and inj_data[snr_type] is None:
-    err_msg = "No data to be plotted on the x-axis was found"
-    raise RuntimeError(err_msg)
-if trig_data[veto_type] is None and inj_data[veto_type] is None:
-    err_msg = "No data to be plotted on the y-axis was found"
-    raise RuntimeError(err_msg)
+# Single detector chi-squares are initialized to 0: we floor possible
+# remaining 0s to 0.005 to avoid asking for logarithms of 0 in the plot
+numpy.putmask(found_trigs[y_key], found_trigs[y_key] == 0, 0.005)
 
 # Generate plots
 logging.info("Plotting...")
 
 # Determine x-axis values of triggers and injections
 # Default is coherent SNR
 x_label = ifo if snr_type == 'single' else snr_type.capitalize()
-x_label = "%s SNR" % x_label
+x_label += " SNR"
 
 # Determine the minumum and maximum SNR value we are dealing with
-x_min = 0.9*plu.axis_min_value(trig_data[snr_type], inj_data[snr_type],
+x_min = 0.9*plu.axis_min_value(found_trigs[x_key], found_injs[x_key],
                                found_missed_file)
-x_max = 1.1*plu.axis_max_value(trig_data[snr_type], inj_data[snr_type],
+x_max = 1.1*plu.axis_max_value(found_trigs[x_key], found_injs[x_key],
                                found_missed_file)
 
 # Determine the minimum and maximum chi-square value we are dealing with
-y_min = 0.9*plu.axis_min_value(trig_data[veto_type], inj_data[veto_type],
+y_min = 0.9*plu.axis_min_value(found_trigs[y_key], found_injs[y_key],
                                found_missed_file)
-y_max = 1.1*plu.axis_max_value(trig_data[veto_type], inj_data[veto_type],
+y_max = 1.1*plu.axis_max_value(found_trigs[y_key], found_injs[y_key],
                                found_missed_file)
 
-# Determine y-axis minimum value and label
+# Determine y-axis label
 y_label = "Network power chi-square" if veto_type == 'network' \
-    else "%s Single %s chi-square" % (ifo, veto_labels[veto_type].lower())
+    else f"{ifo} Single {veto_labels[veto_type].lower()} chi-square"
 
 # Determine contours for plots
 conts = None
@@ -299,8 +288,7 @@ cont_value = None
 colors = None
 # Enable countours of constant reweighted SNR as a function of coherent SNR
 if snr_type == 'coherent':
-    conts, snr_vals, cont_value, colors = calculate_contours(trig_data,
-                                                             opts,
+    conts, snr_vals, cont_value, colors = calculate_contours(opts,
                                                              new_snrs=None)
 # The cut in reweighted SNR involves only the network power chi-square
 if veto_type != 'network':
@@ -314,9 +302,9 @@ if not opts.x_lims:
     else:
         opts.x_lims = str(x_min)+','+str(x_max)
         opts.y_lims = str(y_min)+','+str(10*y_max)
-trigs = [trig_data[snr_type], trig_data[veto_type]]
-injs = [inj_data[snr_type], inj_data[veto_type]]
-plu.pygrb_plotter(trigs, injs, x_label, y_label, opts,
+plu.pygrb_plotter([found_trigs[x_key], found_trigs[y_key]],
+                  [found_injs[x_key], found_injs[y_key]],
+                  x_label, y_label, opts,
                   snr_vals=snr_vals, conts=conts, colors=colors,
                   shade_cont_value=cont_value, vert_spike=True,
                   cmd=' '.join(sys.argv))